// SPDX-License-Identifier: GPL-2.0 /* In-place tunneling */ #include <stdbool.h> #include <string.h> #include <linux/stddef.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/mpls.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/pkt_cls.h> #include <linux/types.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> static const int cfg_port = 8000; static const int cfg_udp_src = 20000; #define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN) #define UDP_PORT 5555 #define MPLS_OVER_UDP_PORT 6635 #define ETH_OVER_UDP_PORT 7777 #define VXLAN_UDP_PORT 8472 #define EXTPROTO_VXLAN 0x1 #define VXLAN_N_VID (1u << 24) #define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8) #define VXLAN_FLAGS 0x8 #define VXLAN_VNI 1 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | MPLS_LS_S_MASK | 0xff); struct vxlanhdr { __be32 vx_flags; __be32 vx_vni; } __attribute__((packed)); struct gre_hdr { __be16 flags; __be16 protocol; } __attribute__((packed)); union l4hdr { struct udphdr udp; struct gre_hdr gre; }; struct v4hdr { struct iphdr ip; union l4hdr l4hdr; __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */ } __attribute__((packed)); struct v6hdr { struct ipv6hdr ip; union l4hdr l4hdr; __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */ } __attribute__((packed)); static __always_inline void set_ipv4_csum(struct iphdr *iph) { __u16 *iph16 = (__u16 *)iph; __u32 csum; int i; iph->check = 0; #pragma clang loop unroll(full) for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) csum += *iph16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); } static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto, __u16 ext_proto) { __u16 udp_dst = UDP_PORT; struct iphdr iph_inner; struct v4hdr h_outer; struct tcphdr tcph; int olen, l2_len; __u8 *l2_hdr = NULL; int tcp_off; __u64 flags; /* Most tests encapsulate a packet into a tunnel with the same * network protocol, and derive the outer header fields from * the inner header. * * The 6in4 case tests different inner and outer protocols. As * the inner is ipv6, but the outer expects an ipv4 header as * input, manually build a struct iphdr based on the ipv6hdr. */ if (encap_proto == IPPROTO_IPV6) { const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1; const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2; struct ipv6hdr iph6_inner; /* Read the IPv6 header */ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner, sizeof(iph6_inner)) < 0) return TC_ACT_OK; /* Derive the IPv4 header fields from the IPv6 header */ memset(&iph_inner, 0, sizeof(iph_inner)); iph_inner.version = 4; iph_inner.ihl = 5; iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) + bpf_ntohs(iph6_inner.payload_len)); iph_inner.ttl = iph6_inner.hop_limit - 1; iph_inner.protocol = iph6_inner.nexthdr; iph_inner.saddr = __bpf_constant_htonl(saddr); iph_inner.daddr = __bpf_constant_htonl(daddr); tcp_off = sizeof(iph6_inner); } else { if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, sizeof(iph_inner)) < 0) return TC_ACT_OK; tcp_off = sizeof(iph_inner); } /* filter only packets we want */ if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) return TC_ACT_OK; if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off, &tcph, sizeof(tcph)) < 0) return TC_ACT_OK; if (tcph.dest != __bpf_constant_htons(cfg_port)) return TC_ACT_OK; olen = sizeof(h_outer.ip); l2_len = 0; flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4; switch (l2_proto) { case ETH_P_MPLS_UC: l2_len = sizeof(mpls_label); udp_dst = MPLS_OVER_UDP_PORT; break; case ETH_P_TEB: l2_len = ETH_HLEN; if (ext_proto & EXTPROTO_VXLAN) { udp_dst = VXLAN_UDP_PORT; l2_len += sizeof(struct vxlanhdr); } else udp_dst = ETH_OVER_UDP_PORT; break; } flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); switch (encap_proto) { case IPPROTO_GRE: flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; olen += sizeof(h_outer.l4hdr.gre); h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); h_outer.l4hdr.gre.flags = 0; break; case IPPROTO_UDP: flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; olen += sizeof(h_outer.l4hdr.udp); h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); h_outer.l4hdr.udp.check = 0; h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) + sizeof(h_outer.l4hdr.udp) + l2_len); break; case IPPROTO_IPIP: case IPPROTO_IPV6: break; default: return TC_ACT_OK; } /* add L2 encap (if specified) */ l2_hdr = (__u8 *)&h_outer + olen; switch (l2_proto) { case ETH_P_MPLS_UC: *(__u32 *)l2_hdr = mpls_label; break; case ETH_P_TEB: flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; if (ext_proto & EXTPROTO_VXLAN) { struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; vxlan_hdr->vx_flags = VXLAN_FLAGS; vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); l2_hdr += sizeof(struct vxlanhdr); } if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN)) return TC_ACT_SHOT; break; } olen += l2_len; /* add room between mac and network header */ if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) return TC_ACT_SHOT; /* prepare new outer network header */ h_outer.ip = iph_inner; h_outer.ip.tot_len = bpf_htons(olen + bpf_ntohs(h_outer.ip.tot_len)); h_outer.ip.protocol = encap_proto; set_ipv4_csum((void *)&h_outer.ip); /* store new outer network header */ if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, BPF_F_INVALIDATE_HASH) < 0) return TC_ACT_SHOT; /* if changing outer proto type, update eth->h_proto */ if (encap_proto == IPPROTO_IPV6) { struct ethhdr eth; if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0) return TC_ACT_SHOT; eth.h_proto = bpf_htons(ETH_P_IP); if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0) return TC_ACT_SHOT; } return TC_ACT_OK; } static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto) { return __encap_ipv4(skb, encap_proto, l2_proto, 0); } static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto, __u16 ext_proto) { __u16 udp_dst = UDP_PORT; struct ipv6hdr iph_inner; struct v6hdr h_outer; struct tcphdr tcph; int olen, l2_len; __u8 *l2_hdr = NULL; __u16 tot_len; __u64 flags; if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, sizeof(iph_inner)) < 0) return TC_ACT_OK; /* filter only packets we want */ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), &tcph, sizeof(tcph)) < 0) return TC_ACT_OK; if (tcph.dest != __bpf_constant_htons(cfg_port)) return TC_ACT_OK; olen = sizeof(h_outer.ip); l2_len = 0; flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; switch (l2_proto) { case ETH_P_MPLS_UC: l2_len = sizeof(mpls_label); udp_dst = MPLS_OVER_UDP_PORT; break; case ETH_P_TEB: l2_len = ETH_HLEN; if (ext_proto & EXTPROTO_VXLAN) { udp_dst = VXLAN_UDP_PORT; l2_len += sizeof(struct vxlanhdr); } else udp_dst = ETH_OVER_UDP_PORT; break; } flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); switch (encap_proto) { case IPPROTO_GRE: flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; olen += sizeof(h_outer.l4hdr.gre); h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); h_outer.l4hdr.gre.flags = 0; break; case IPPROTO_UDP: flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; olen += sizeof(h_outer.l4hdr.udp); h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) + sizeof(h_outer.l4hdr.udp) + l2_len; h_outer.l4hdr.udp.check = 0; h_outer.l4hdr.udp.len = bpf_htons(tot_len); break; case IPPROTO_IPV6: break; default: return TC_ACT_OK; } /* add L2 encap (if specified) */ l2_hdr = (__u8 *)&h_outer + olen; switch (l2_proto) { case ETH_P_MPLS_UC: *(__u32 *)l2_hdr = mpls_label; break; case ETH_P_TEB: flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; if (ext_proto & EXTPROTO_VXLAN) { struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; vxlan_hdr->vx_flags = VXLAN_FLAGS; vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); l2_hdr += sizeof(struct vxlanhdr); } if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN)) return TC_ACT_SHOT; break; } olen += l2_len; /* add room between mac and network header */ if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) return TC_ACT_SHOT; /* prepare new outer network header */ h_outer.ip = iph_inner; h_outer.ip.payload_len = bpf_htons(olen + bpf_ntohs(h_outer.ip.payload_len)); h_outer.ip.nexthdr = encap_proto; /* store new outer network header */ if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, BPF_F_INVALIDATE_HASH) < 0) return TC_ACT_SHOT; return TC_ACT_OK; } static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto) { return __encap_ipv6(skb, encap_proto, l2_proto, 0); } SEC("encap_ipip_none") int __encap_ipip_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP); else return TC_ACT_OK; } SEC("encap_gre_none") int __encap_gre_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP); else return TC_ACT_OK; } SEC("encap_gre_mpls") int __encap_gre_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC); else return TC_ACT_OK; } SEC("encap_gre_eth") int __encap_gre_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB); else return TC_ACT_OK; } SEC("encap_udp_none") int __encap_udp_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP); else return TC_ACT_OK; } SEC("encap_udp_mpls") int __encap_udp_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC); else return TC_ACT_OK; } SEC("encap_udp_eth") int __encap_udp_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB); else return TC_ACT_OK; } SEC("encap_vxlan_eth") int __encap_vxlan_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) return __encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB, EXTPROTO_VXLAN); else return TC_ACT_OK; } SEC("encap_sit_none") int __encap_sit_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP); else return TC_ACT_OK; } SEC("encap_ip6tnl_none") int __encap_ip6tnl_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6); else return TC_ACT_OK; } SEC("encap_ip6gre_none") int __encap_ip6gre_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6); else return TC_ACT_OK; } SEC("encap_ip6gre_mpls") int __encap_ip6gre_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC); else return TC_ACT_OK; } SEC("encap_ip6gre_eth") int __encap_ip6gre_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB); else return TC_ACT_OK; } SEC("encap_ip6udp_none") int __encap_ip6udp_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6); else return TC_ACT_OK; } SEC("encap_ip6udp_mpls") int __encap_ip6udp_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC); else return TC_ACT_OK; } SEC("encap_ip6udp_eth") int __encap_ip6udp_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB); else return TC_ACT_OK; } SEC("encap_ip6vxlan_eth") int __encap_ip6vxlan_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) return __encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB, EXTPROTO_VXLAN); else return TC_ACT_OK; } static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) { struct gre_hdr greh; struct udphdr udph; int olen = len; switch (proto) { case IPPROTO_IPIP: case IPPROTO_IPV6: break; case IPPROTO_GRE: olen += sizeof(struct gre_hdr); if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0) return TC_ACT_OK; switch (bpf_ntohs(greh.protocol)) { case ETH_P_MPLS_UC: olen += sizeof(mpls_label); break; case ETH_P_TEB: olen += ETH_HLEN; break; } break; case IPPROTO_UDP: olen += sizeof(struct udphdr); if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0) return TC_ACT_OK; switch (bpf_ntohs(udph.dest)) { case MPLS_OVER_UDP_PORT: olen += sizeof(mpls_label); break; case ETH_OVER_UDP_PORT: olen += ETH_HLEN; break; case VXLAN_UDP_PORT: olen += ETH_HLEN + sizeof(struct vxlanhdr); break; } break; default: return TC_ACT_OK; } if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, BPF_F_ADJ_ROOM_FIXED_GSO)) return TC_ACT_SHOT; return TC_ACT_OK; } static int decap_ipv4(struct __sk_buff *skb) { struct iphdr iph_outer; if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer)) < 0) return TC_ACT_OK; if (iph_outer.ihl != 5) return TC_ACT_OK; return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), iph_outer.protocol); } static int decap_ipv6(struct __sk_buff *skb) { struct ipv6hdr iph_outer; if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer)) < 0) return TC_ACT_OK; return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), iph_outer.nexthdr); } SEC("decap") int decap_f(struct __sk_buff *skb) { switch (skb->protocol) { case __bpf_constant_htons(ETH_P_IP): return decap_ipv4(skb); case __bpf_constant_htons(ETH_P_IPV6): return decap_ipv6(skb); default: /* does not match, ignore */ return TC_ACT_OK; } } char __license[] SEC("license") = "GPL";