In the last article, I wrote a program to return ICMP Echo Replies using XDP.
I implemented a program that returns ICMP Echo Replies, but it turned out that the ICMP checksum was not calculated correctly.
This time, I would like to describe how to correctly calculate and return the ICMP checksum.
Please praise me for writing for two consecutive days without being lazy.
ICMP Checksum
- ICMP Header Structure
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Type | Code | Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Identifier | Sequence Number | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Data | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
The ICMP header includes a Checksum, and since we changed the Type from ICMP Echo Request to ICMP Echo Reply and returned the packet, we need to recalculate the value of the Checksum.
The calculation of the ICMP checksum involves adding the header and the entire data in 16-bit units, and if overflow occurs, that value is added again. Finally, taking the one’s complement (bit inversion) yields the final checksum.
This calculation method is detailed in RFC 1624. Be careful when referring to RFC 1141, as its description of the difference calculation is incorrect and has been corrected in RFC 1624.
I’ve had experiences where I wasted time looking at RFC 1141 without realizing it had been updated.
RFCs are not translated by someone, so always read the original text… (A reminder to myself)
Since we add the values in 16-bits and take the one’s complement at the end, we only need to obtain the difference in 16-bit units for the modified areas, and there is no need to recalculate all the data.
bpf_csum_diff
When calculating checksums in eBPF programs, there is a function called bpf_csum_diff
available. This function makes it easy to perform checksum calculations.
bpf_csum_diff
calculates the change in the checksum based on 16-bit changes in the specified memory area. This allows for efficient updating of ICMP and IP checksums.
Program to Return ICMP Echo Reply
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <arpa/inet.h>
static __always_inline __u16 csum_fold(__u32 csum)
{
// Add the upper 16 bits to the lower 16 bits
csum = (csum & 0xffff) + (csum >> 16);
// Add the carry-over once more
csum = (csum & 0xffff) + (csum >> 16);
// Take the one's complement and return the final checksum
return (__u16)~csum;
}
static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
__u8 tmp[ETH_ALEN];
__builtin_memcpy(tmp, a, ETH_ALEN);
__builtin_memcpy(a, b, ETH_ALEN);
__builtin_memcpy(b, tmp, ETH_ALEN);
}
static __always_inline void swap_ip_addr(__u32 *a, __u32 *b)
{
__u32 tmp = *a;
*a = *b;
*b = tmp;
}
SEC("xdp")
int xdp_echo_reply(struct xdp_md *ctx)
{
void *data_end = (void *)(unsigned long)ctx->data_end;
void *data = (void *)(unsigned long)ctx->data;
// Get pointer to Ethernet header
struct ethhdr *eth = data;
if ((void *)eth + sizeof(*eth) > data_end)
return XDP_PASS;
// Check if the Ethernet frame contains an IP packet
if (eth->h_proto != htons(ETH_P_IP))
return XDP_PASS;
// Get pointer to IP header
struct iphdr *ip = data + sizeof(*eth);
if ((void *)ip + sizeof(*ip) > data_end)
return XDP_PASS;
// Check if the protocol is ICMP
if (ip->protocol != IPPROTO_ICMP)
return XDP_PASS;
// Get pointer to ICMP header
struct icmphdr *icmp = (void *)ip + sizeof(*ip);
if ((void *)icmp + sizeof(*icmp) > data_end)
return XDP_PASS;
// Pass packets except ICMP Echo Requests
if (icmp->type != ICMP_ECHO)
return XDP_PASS;
// Swap source and destination MAC addresses
swap_eth_addr(eth->h_dest, eth->h_source);
// Swap source and destination IP addresses
swap_ip_addr(&ip->saddr, &ip->daddr);
// Make a copy of the ICMP header
struct icmphdr icmp_before = *icmp;
// Change type to ICMP Echo Reply
icmp->type = ICMP_ECHOREPLY;
// Initialize checksum to 0
icmp->checksum = 0;
__s64 value = bpf_csum_diff((void *)&icmp_before, sizeof(icmp_before), (void *)icmp, sizeof(*icmp), 0);
if (value >= 0)
icmp->checksum = csum_fold(value);
// Drop the ICMP Echo Request packet
// return XDP_DROP;
return XDP_TX;
}
char _license[] SEC("license") = "MIT";
static __always_inline __u16 csum_fold(__u32 csum)
{
// Add the upper 16 bits to the lower 16 bits
csum = (csum & 0xffff) + (csum >> 16);
// Add the carry-over once more
csum = (csum & 0xffff) + (csum >> 16);
// Take the one's complement and return the final checksum
return (__u16)~csum;
}
The csum_fold
function compresses a 32-bit checksum into a 16-bit checksum. The return value of bpf_csum_diff()
is 32 bits and can produce carry. Therefore, we first add the upper 16 bits to the lower 16 bits, and perform carry processing again. Finally, we take the one’s complement (bit inversion) to obtain the final checksum value.
static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
__u8 tmp[ETH_ALEN];
__builtin_memcpy(tmp, a, ETH_ALEN);
__builtin_memcpy(a, b, ETH_ALEN);
__builtin_memcpy(b, tmp, ETH_ALEN);
}
The swap_eth_addr
function swaps the MAC addresses in the Ethernet header.
static __always_inline void swap_ip_addr(__u32 *a, __u32 *b)
{
__u32 tmp = *a;
*a = *b;
*b = tmp;
}
The swap_ip_addr
function swaps the IP addresses in the IP header.
In the case of eBPF, function calls need to be inline expanded, which is why the __always_inline directive is added.
// Swap source and destination MAC addresses
swap_eth_addr(eth->h_dest, eth->h_source);
// Swap source and destination IP addresses
swap_ip_addr(&ip->saddr, &ip->daddr);
// Make a copy of the ICMP header
struct icmphdr icmp_before = *icmp;
// Change type to ICMP Echo Reply
icmp->type = ICMP_ECHOREPLY;
// Initialize checksum to 0
icmp->checksum = 0;
__s64 value = bpf_csum_diff((void *)&icmp_before, sizeof(icmp_before), (void *)icmp, sizeof(*icmp), 0);
if (value >= 0)
icmp->checksum = csum_fold(value);
This part differs from before.
The swapping of MAC and IP addresses is encapsulated in functions swap_eth_addr
and swap_ip_addr
.
We make a copy of icmp
and change icmp->type
to ICMP_ECHOREPLY
.
Then we calculate the checksum.
We initialize icmp->checksum = 0;
and then calculate the difference using bpf_csum_diff()
. Since the return value of bpf_csum_diff
does not handle carry, we fold it using csum_fold()
.
Operation Confirmation
Send ping from the client
$ ping 192.168.XXX.XXX
Confirm that an ICMP Echo Reply is returned when ping is sent from the client.
$ sudo tcpdump -i eth0 icmp -vvv XX:XX:XX.413478 IP (tos 0x0, ttl 64, id 35523, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.XXX.1 > 192.168.XXX.2: ICMP echo request, id 64346, seq 16, length 64 XX:XX:XX.414359 IP (tos 0x0, ttl 64, id 35523, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.XXX.2 > 192.168.XXX.1: ICMP echo reply, id 64346, seq 16, length 64
Successfully, the checksum error has been resolved and we confirmed that the ICMP Echo Reply is returned.
Actually…
Actually, there is also a checksum in the IP header, so it also needs to be recalculated.
- IP Header Structure
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Version | IHL | DSCP | ECN | Total Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Identification |Flags| Fragment Offset | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Time to Live | Protocol | Header Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Source IP Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Destination IP Address | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Options (if any) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
There is a field called HeaderChecksum, and we should actually recalculate this value.
However, since we are just swapping the Source IP and Destination IP in 16-bit units, the nature of checksum calculation means that the checksum result does not change.
This is due to the property that “the checksum as a one’s complement of the sum does not change when IP addresses are swapped in 16-bit units.” Therefore, this time it just happened to have no effect, but if other fields are changed, it is necessary to recalculate the IP checksum.
When necessary, let’s recalculate the IP header similar to how we did for the ICMP header.
Calculate the Checksum for IP Header as Well
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <arpa/inet.h>
static __always_inline __u16 csum_fold(__u32 csum)
{
// Add the upper 16 bits to the lower 16 bits
csum = (csum & 0xffff) + (csum >> 16);
// Add the carry-over once more
csum = (csum & 0xffff) + (csum >> 16);
// Take the one's complement and return the final checksum
return (__u16)~csum;
}
static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
__u8 tmp[ETH_ALEN];
__builtin_memcpy(tmp, a, ETH_ALEN);
__builtin_memcpy(a, b, ETH_ALEN);
__builtin_memcpy(b, tmp, ETH_ALEN);
}
static __always_inline void swap_ip_addr(__u32 *a, __u32 *b)
{
__u32 tmp = *a;
*a = *b;
*b = tmp;
}
SEC("xdp")
int xdp_echo_reply(struct xdp_md *ctx)
{
void *data_end = (void *)(unsigned long)ctx->data_end;
void *data = (void *)(unsigned long)ctx->data;
// Get pointer to Ethernet header
struct ethhdr *eth = data;
if ((void *)eth + sizeof(*eth) > data_end)
return XDP_PASS;
// Check if the Ethernet frame contains an IP packet
if (eth->h_proto != htons(ETH_P_IP))
return XDP_PASS;
// Get pointer to IP header
struct iphdr *ip = data + sizeof(*eth);
if ((void *)ip + sizeof(*ip) > data_end)
return XDP_PASS;
// Check if the protocol is ICMP
if (ip->protocol != IPPROTO_ICMP)
return XDP_PASS;
// Get pointer to ICMP header
struct icmphdr *icmp = (void *)ip + sizeof(*ip);
if ((void *)icmp + sizeof(*icmp) > data_end)
return XDP_PASS;
// Pass packets except ICMP Echo Requests
if (icmp->type != ICMP_ECHO)
return XDP_PASS;
// Make a copy of the IP header
struct iphdr ip_before = *ip;
// Swap source and destination MAC addresses
swap_eth_addr(eth->h_dest, eth->h_source);
// Swap source and destination IP addresses
swap_ip_addr(&ip->saddr, &ip->daddr);
// Make a copy of the ICMP header
struct icmphdr icmp_before = *icmp;
// Change type to ICMP Echo Reply
icmp->type = ICMP_ECHOREPLY;
// Initialize checksum to 0
icmp->checksum = 0;
__s64 value = bpf_csum_diff((void *)&icmp_before, sizeof(icmp_before), (void *)icmp, sizeof(*icmp), 0);
if (value >= 0)
icmp->checksum = csum_fold(value);
// IP header checksum
ip->check = 0;
value = bpf_csum_diff((void *)&ip_before, sizeof(ip_before), (void *)ip, sizeof(*ip), 0);
if (value >= 0)
ip->check = csum_fold(value);
// Drop the ICMP Echo Request packet
// return XDP_DROP;
return XDP_TX;
}
char _license[] SEC("license") = "MIT";
Before rewriting the IP header in swap_ip_addr
, we make a copy and calculate the difference using bpf_csum_diff
, just like we did for the ICMP checksum. Now, the IP header checksum will also be calculated correctly.
Summary
- Implemented an XDP program to return ICMP Echo Replies in response to ICMP Echo Requests.
- Introduced the method to correctly calculate the ICMP checksum and return it.
- The checksum calculation only requires obtaining the difference in the modified areas in 16-bit units, without needing to recalculate all data.
- Using the function
bpf_csum_diff()
, it becomes easy to perform checksum calculations.