Quick Try at an Introduction to XDP Part 3 (ICMP Echo Reply Section 2)
Server Network Programming
Published: 2025-03-06

In the last article, I wrote a program to return ICMP Echo Replies using XDP.
I implemented a program that returns ICMP Echo Replies, but it turned out that the ICMP checksum was not calculated correctly.

This time, I would like to describe how to correctly calculate and return the ICMP checksum.

Please praise me for writing for two consecutive days without being lazy.

ICMP Checksum

  • ICMP Header Structure
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |      Type      |     Code      |          Checksum           |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |           Identifier           |        Sequence Number      |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                             Data                              |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    

The ICMP header includes a Checksum, and since we changed the Type from ICMP Echo Request to ICMP Echo Reply and returned the packet, we need to recalculate the value of the Checksum.

The calculation of the ICMP checksum involves adding the header and the entire data in 16-bit units, and if overflow occurs, that value is added again. Finally, taking the one’s complement (bit inversion) yields the final checksum.

This calculation method is detailed in RFC 1624. Be careful when referring to RFC 1141, as its description of the difference calculation is incorrect and has been corrected in RFC 1624.

I’ve had experiences where I wasted time looking at RFC 1141 without realizing it had been updated.

RFCs are not translated by someone, so always read the original text… (A reminder to myself)

Since we add the values in 16-bits and take the one’s complement at the end, we only need to obtain the difference in 16-bit units for the modified areas, and there is no need to recalculate all the data.

bpf_csum_diff

When calculating checksums in eBPF programs, there is a function called bpf_csum_diff available. This function makes it easy to perform checksum calculations.

bpf_csum_diff calculates the change in the checksum based on 16-bit changes in the specified memory area. This allows for efficient updating of ICMP and IP checksums.

Program to Return ICMP Echo Reply

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <arpa/inet.h>

static __always_inline __u16 csum_fold(__u32 csum)
{
    // Add the upper 16 bits to the lower 16 bits
    csum = (csum & 0xffff) + (csum >> 16);
    // Add the carry-over once more
    csum = (csum & 0xffff) + (csum >> 16);
    // Take the one's complement and return the final checksum
    return (__u16)~csum;
}

static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
    __u8 tmp[ETH_ALEN];
    __builtin_memcpy(tmp, a, ETH_ALEN);
    __builtin_memcpy(a, b, ETH_ALEN);
    __builtin_memcpy(b, tmp, ETH_ALEN);
}

static __always_inline void swap_ip_addr(__u32 *a, __u32 *b)
{
    __u32 tmp = *a;
    *a = *b;
    *b = tmp;
}

SEC("xdp")
int xdp_echo_reply(struct xdp_md *ctx)
{
    void *data_end = (void *)(unsigned long)ctx->data_end;
    void *data = (void *)(unsigned long)ctx->data;

    // Get pointer to Ethernet header
    struct ethhdr *eth = data;
    if ((void *)eth + sizeof(*eth) > data_end)
        return XDP_PASS;

    // Check if the Ethernet frame contains an IP packet
    if (eth->h_proto != htons(ETH_P_IP))
        return XDP_PASS;

    // Get pointer to IP header
    struct iphdr *ip = data + sizeof(*eth);
    if ((void *)ip + sizeof(*ip) > data_end)
        return XDP_PASS;

    // Check if the protocol is ICMP
    if (ip->protocol != IPPROTO_ICMP)
        return XDP_PASS;

    // Get pointer to ICMP header
    struct icmphdr *icmp = (void *)ip + sizeof(*ip);
    if ((void *)icmp + sizeof(*icmp) > data_end)
        return XDP_PASS;

    // Pass packets except ICMP Echo Requests
    if (icmp->type != ICMP_ECHO)
        return XDP_PASS;

    // Swap source and destination MAC addresses
    swap_eth_addr(eth->h_dest, eth->h_source);
    // Swap source and destination IP addresses
    swap_ip_addr(&ip->saddr, &ip->daddr); 

    // Make a copy of the ICMP header
    struct icmphdr icmp_before = *icmp;

    // Change type to ICMP Echo Reply
    icmp->type = ICMP_ECHOREPLY;

    // Initialize checksum to 0
    icmp->checksum = 0;
    __s64 value = bpf_csum_diff((void *)&icmp_before, sizeof(icmp_before), (void *)icmp, sizeof(*icmp), 0);
    if (value >= 0)
        icmp->checksum = csum_fold(value);

    // Drop the ICMP Echo Request packet
    // return XDP_DROP;
    return XDP_TX;
}

char _license[] SEC("license") = "MIT";
static __always_inline __u16 csum_fold(__u32 csum)
{
    // Add the upper 16 bits to the lower 16 bits
    csum = (csum & 0xffff) + (csum >> 16);
    // Add the carry-over once more
    csum = (csum & 0xffff) + (csum >> 16);
    // Take the one's complement and return the final checksum
    return (__u16)~csum;
}

The csum_fold function compresses a 32-bit checksum into a 16-bit checksum. The return value of bpf_csum_diff() is 32 bits and can produce carry. Therefore, we first add the upper 16 bits to the lower 16 bits, and perform carry processing again. Finally, we take the one’s complement (bit inversion) to obtain the final checksum value.

static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
    __u8 tmp[ETH_ALEN];
    __builtin_memcpy(tmp, a, ETH_ALEN);
    __builtin_memcpy(a, b, ETH_ALEN);
    __builtin_memcpy(b, tmp, ETH_ALEN);
}

The swap_eth_addr function swaps the MAC addresses in the Ethernet header.

static __always_inline void swap_ip_addr(__u32 *a, __u32 *b)
{
    __u32 tmp = *a;
    *a = *b;
    *b = tmp;
}

The swap_ip_addr function swaps the IP addresses in the IP header.

In the case of eBPF, function calls need to be inline expanded, which is why the __always_inline directive is added.

    // Swap source and destination MAC addresses
    swap_eth_addr(eth->h_dest, eth->h_source);
    // Swap source and destination IP addresses
    swap_ip_addr(&ip->saddr, &ip->daddr); 

    // Make a copy of the ICMP header
    struct icmphdr icmp_before = *icmp;

    // Change type to ICMP Echo Reply
    icmp->type = ICMP_ECHOREPLY;

    // Initialize checksum to 0
    icmp->checksum = 0;
    __s64 value = bpf_csum_diff((void *)&icmp_before, sizeof(icmp_before), (void *)icmp, sizeof(*icmp), 0);
    if (value >= 0)
        icmp->checksum = csum_fold(value);

This part differs from before.
The swapping of MAC and IP addresses is encapsulated in functions swap_eth_addr and swap_ip_addr.

We make a copy of icmp and change icmp->type to ICMP_ECHOREPLY.
Then we calculate the checksum.

We initialize icmp->checksum = 0; and then calculate the difference using bpf_csum_diff(). Since the return value of bpf_csum_diff does not handle carry, we fold it using csum_fold().

Operation Confirmation

  • Send ping from the client

    $ ping 192.168.XXX.XXX
    
  • Confirm that an ICMP Echo Reply is returned when ping is sent from the client.

    $ sudo tcpdump -i eth0 icmp -vvv
    XX:XX:XX.413478 IP (tos 0x0, ttl 64, id 35523, offset 0, flags [DF], proto ICMP (1), length 84)
        192.168.XXX.1 > 192.168.XXX.2: ICMP echo request, id 64346, seq 16, length 64
    XX:XX:XX.414359 IP (tos 0x0, ttl 64, id 35523, offset 0, flags [DF], proto ICMP (1), length 84)
        192.168.XXX.2 > 192.168.XXX.1: ICMP echo reply, id 64346, seq 16, length 64
    

Successfully, the checksum error has been resolved and we confirmed that the ICMP Echo Reply is returned.

Actually…

Actually, there is also a checksum in the IP header, so it also needs to be recalculated.

  • IP Header Structure
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    | Version |  IHL  |  DSCP  | ECN  |        Total Length         |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |         Identification        |Flags|     Fragment Offset     |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |  Time to Live |   Protocol    |        Header Checksum        |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                      Source IP Address                       |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                   Destination IP Address                     |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |                    Options (if any)                          |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    

There is a field called HeaderChecksum, and we should actually recalculate this value.

However, since we are just swapping the Source IP and Destination IP in 16-bit units, the nature of checksum calculation means that the checksum result does not change.
This is due to the property that “the checksum as a one’s complement of the sum does not change when IP addresses are swapped in 16-bit units.” Therefore, this time it just happened to have no effect, but if other fields are changed, it is necessary to recalculate the IP checksum.

When necessary, let’s recalculate the IP header similar to how we did for the ICMP header.

Calculate the Checksum for IP Header as Well

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <arpa/inet.h>

static __always_inline __u16 csum_fold(__u32 csum)
{
    // Add the upper 16 bits to the lower 16 bits
    csum = (csum & 0xffff) + (csum >> 16);
    // Add the carry-over once more
    csum = (csum & 0xffff) + (csum >> 16);
    // Take the one's complement and return the final checksum
    return (__u16)~csum;
}

static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
    __u8 tmp[ETH_ALEN];
    __builtin_memcpy(tmp, a, ETH_ALEN);
    __builtin_memcpy(a, b, ETH_ALEN);
    __builtin_memcpy(b, tmp, ETH_ALEN);
}

static __always_inline void swap_ip_addr(__u32 *a, __u32 *b)
{
    __u32 tmp = *a;
    *a = *b;
    *b = tmp;
}

SEC("xdp")
int xdp_echo_reply(struct xdp_md *ctx)
{
    void *data_end = (void *)(unsigned long)ctx->data_end;
    void *data = (void *)(unsigned long)ctx->data;

    // Get pointer to Ethernet header
    struct ethhdr *eth = data;
    if ((void *)eth + sizeof(*eth) > data_end)
        return XDP_PASS;

    // Check if the Ethernet frame contains an IP packet
    if (eth->h_proto != htons(ETH_P_IP))
        return XDP_PASS;

    // Get pointer to IP header
    struct iphdr *ip = data + sizeof(*eth);
    if ((void *)ip + sizeof(*ip) > data_end)
        return XDP_PASS;

    // Check if the protocol is ICMP
    if (ip->protocol != IPPROTO_ICMP)
        return XDP_PASS;

    // Get pointer to ICMP header
    struct icmphdr *icmp = (void *)ip + sizeof(*ip);
    if ((void *)icmp + sizeof(*icmp) > data_end)
        return XDP_PASS;

    // Pass packets except ICMP Echo Requests
    if (icmp->type != ICMP_ECHO)
        return XDP_PASS;

    // Make a copy of the IP header
    struct iphdr ip_before = *ip;

    // Swap source and destination MAC addresses
    swap_eth_addr(eth->h_dest, eth->h_source);
    // Swap source and destination IP addresses
    swap_ip_addr(&ip->saddr, &ip->daddr); 

    // Make a copy of the ICMP header
    struct icmphdr icmp_before = *icmp;

    // Change type to ICMP Echo Reply
    icmp->type = ICMP_ECHOREPLY;

    // Initialize checksum to 0
    icmp->checksum = 0;
    __s64 value = bpf_csum_diff((void *)&icmp_before, sizeof(icmp_before), (void *)icmp, sizeof(*icmp), 0);
    if (value >= 0)
        icmp->checksum = csum_fold(value);

    // IP header checksum
    ip->check = 0;
    value = bpf_csum_diff((void *)&ip_before, sizeof(ip_before), (void *)ip, sizeof(*ip), 0);
    if (value >= 0)
        ip->check = csum_fold(value);

    // Drop the ICMP Echo Request packet
    // return XDP_DROP;
    return XDP_TX;
}

char _license[] SEC("license") = "MIT";

Before rewriting the IP header in swap_ip_addr, we make a copy and calculate the difference using bpf_csum_diff, just like we did for the ICMP checksum. Now, the IP header checksum will also be calculated correctly.

Summary

  • Implemented an XDP program to return ICMP Echo Replies in response to ICMP Echo Requests.
  • Introduced the method to correctly calculate the ICMP checksum and return it.
  • The checksum calculation only requires obtaining the difference in the modified areas in 16-bit units, without needing to recalculate all data.
  • Using the function bpf_csum_diff(), it becomes easy to perform checksum calculations.