diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 910fb17ad148..22d54b9b700d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1886,6 +1886,9 @@ struct napi_gro_cb { /* Number of checksums via CHECKSUM_UNNECESSARY */ u8 csum_cnt:3; + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/include/net/gue.h b/include/net/gue.h new file mode 100644 index 000000000000..b6c332788084 --- /dev/null +++ b/include/net/gue.h @@ -0,0 +1,23 @@ +#ifndef __NET_GUE_H +#define __NET_GUE_H + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen:4, + version:4; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:4, + hlen:4; +#else +#error "Please fix " +#endif + __u8 next_hdr; + __u16 flags; + }; + __u32 word; + }; +}; + +#endif diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index e03376de453d..8df06894da23 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -13,6 +13,7 @@ enum { FOU_ATTR_PORT, /* u16 */ FOU_ATTR_AF, /* u8 */ FOU_ATTR_IPPROTO, /* u8 */ + FOU_ATTR_TYPE, /* u8 */ __FOU_ATTR_MAX, }; @@ -27,6 +28,12 @@ enum { __FOU_CMD_MAX, }; +enum { + FOU_ENCAP_UNSPEC, + FOU_ENCAP_DIRECT, + FOU_ENCAP_GUE, +}; + #define FOU_CMD_MAX (__FOU_CMD_MAX - 1) #endif /* _UAPI_LINUX_FOU_H */ diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 7c832afdfa94..280d9e092283 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -64,6 +64,7 @@ enum { enum tunnel_encap_types { TUNNEL_ENCAP_NONE, TUNNEL_ENCAP_FOU, + TUNNEL_ENCAP_GUE, }; #define TUNNEL_ENCAP_FLAG_CSUM (1<<0) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index dced89fbe480..efa70ad44906 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ struct fou { }; struct fou_cfg { + u16 type; u8 protocol; struct udp_port_cfg udp_config; }; @@ -64,15 +66,51 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) sizeof(struct udphdr)); } +static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct fou *fou = fou_from_sock(sk); + size_t len; + struct guehdr *guehdr; + struct udphdr *uh; + + if (!fou) + return 1; + + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) + goto drop; + + uh = udp_hdr(skb); + guehdr = (struct guehdr *)&uh[1]; + + len += guehdr->hlen << 2; + if (!pskb_may_pull(skb, len)) + goto drop; + + if (guehdr->version != 0) + goto drop; + + if (guehdr->flags) { + /* No support yet */ + goto drop; + } + + return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len); +drop: + kfree_skb(skb); + return 0; +} + static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - const struct net_offload **offloads) + struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; u8 proto = NAPI_GRO_CB(skb)->proto; + const struct net_offload **offloads; rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (!ops || !ops->callbacks.gro_receive) goto out_unlock; @@ -85,14 +123,15 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff, - const struct net_offload **offloads) +static int fou_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; u8 proto = NAPI_GRO_CB(skb)->proto; int err = -ENOSYS; + const struct net_offload **offloads; rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; @@ -105,26 +144,110 @@ out_unlock: return err; } -static struct sk_buff **fou4_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff **gue_gro_receive(struct sk_buff **head, + struct sk_buff *skb) { - return fou_gro_receive(head, skb, inet_offloads); + const struct net_offload **offloads; + const struct net_offload *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + u8 proto; + struct guehdr *guehdr; + unsigned int hlen, guehlen; + unsigned int off; + int flush = 1; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*guehdr); + guehdr = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + guehdr = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!guehdr)) + goto out; + } + + proto = guehdr->next_hdr; + + rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_receive)) + goto out_unlock; + + guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); + + hlen = off + guehlen; + if (skb_gro_header_hard(skb, hlen)) { + guehdr = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!guehdr)) + goto out_unlock; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + const struct guehdr *guehdr2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + guehdr2 = (struct guehdr *)(p->data + off); + + /* Compare base GUE header to be equal (covers + * hlen, version, next_hdr, and flags. + */ + if (guehdr->word != guehdr2->word) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* Compare optional fields are the same. */ + if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1], + guehdr->hlen << 2)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + skb_gro_pull(skb, guehlen); + + /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ + skb_gro_postpull_rcsum(skb, guehdr, guehlen); + + pp = ops->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; } -static int fou4_gro_complete(struct sk_buff *skb, int nhoff) +static int gue_gro_complete(struct sk_buff *skb, int nhoff) { - return fou_gro_complete(skb, nhoff, inet_offloads); -} + const struct net_offload **offloads; + struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); + const struct net_offload *ops; + unsigned int guehlen; + u8 proto; + int err = -ENOENT; -static struct sk_buff **fou6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - return fou_gro_receive(head, skb, inet6_offloads); -} + proto = guehdr->next_hdr; -static int fou6_gro_complete(struct sk_buff *skb, int nhoff) -{ - return fou_gro_complete(skb, nhoff, inet6_offloads); + guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); + + rcu_read_lock(); + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) + goto out_unlock; + + err = ops->callbacks.gro_complete(skb, nhoff + guehlen); + +out_unlock: + rcu_read_unlock(); + return err; } static int fou_add_to_port_list(struct fou *fou) @@ -162,6 +285,28 @@ static void fou_release(struct fou *fou) kfree(fou); } +static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) +{ + udp_sk(sk)->encap_rcv = fou_udp_recv; + fou->protocol = cfg->protocol; + fou->udp_offloads.callbacks.gro_receive = fou_gro_receive; + fou->udp_offloads.callbacks.gro_complete = fou_gro_complete; + fou->udp_offloads.port = cfg->udp_config.local_udp_port; + fou->udp_offloads.ipproto = cfg->protocol; + + return 0; +} + +static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) +{ + udp_sk(sk)->encap_rcv = gue_udp_recv; + fou->udp_offloads.callbacks.gro_receive = gue_gro_receive; + fou->udp_offloads.callbacks.gro_complete = gue_gro_complete; + fou->udp_offloads.port = cfg->udp_config.local_udp_port; + + return 0; +} + static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { @@ -184,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ - fou->protocol = cfg->protocol; - fou->port = cfg->udp_config.local_udp_port; - udp_sk(sk)->encap_rcv = fou_udp_recv; + fou->port = cfg->udp_config.local_udp_port; + + /* Initial for fou type */ + switch (cfg->type) { + case FOU_ENCAP_DIRECT: + err = fou_encap_init(sk, fou, cfg); + if (err) + goto error; + break; + case FOU_ENCAP_GUE: + err = gue_encap_init(sk, fou, cfg); + if (err) + goto error; + break; + default: + err = -EINVAL; + goto error; + } udp_sk(sk)->encap_type = 1; udp_encap_enable(); @@ -199,23 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - switch (cfg->udp_config.family) { - case AF_INET: - fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete; - break; - case AF_INET6: - fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete; - break; - default: - err = -EPFNOSUPPORT; - goto error; - } - - fou->udp_offloads.port = cfg->udp_config.local_udp_port; - fou->udp_offloads.ipproto = cfg->protocol; - if (cfg->udp_config.family == AF_INET) { err = udp_add_offload(&fou->udp_offloads); if (err) @@ -272,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, + [FOU_ATTR_TYPE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, @@ -299,6 +442,9 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_IPPROTO]) cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]); + if (info->attrs[FOU_ATTR_TYPE]) + cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); + return 0; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2272de90c2d4..0bb8e141eacc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -56,6 +56,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e) return 0; case TUNNEL_ENCAP_FOU: return sizeof(struct udphdr); + case TUNNEL_ENCAP_GUE: + return sizeof(struct udphdr) + sizeof(struct guehdr); default: return -EINVAL; } @@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, skb_reset_transport_header(skb); uh = udp_hdr(skb); + if (e->type == TUNNEL_ENCAP_GUE) { + struct guehdr *guehdr = (struct guehdr *)&uh[1]; + + guehdr->version = 0; + guehdr->hlen = 0; + guehdr->flags = 0; + guehdr->next_hdr = *protocol; + } + uh->dest = e->dport; uh->source = sport; uh->len = htons(skb->len); @@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, case TUNNEL_ENCAP_NONE: return 0; case TUNNEL_ENCAP_FOU: + case TUNNEL_ENCAP_GUE: return fou_build_header(skb, &t->encap, t->encap_hlen, protocol, fl4); default: @@ -759,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, df |= (inner_iph->frag_off&htons(IP_DF)); max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) - + rt->dst.header_len; + + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8c35f2c939ee..507310ef4b56 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -334,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_gro_compute_pseudo); skip: + NAPI_GRO_CB(skb)->is_ipv6 = 0; return udp_gro_receive(head, skb, uh); flush: diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 8f96988c1db2..6b8f543f6ac6 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, ip6_gro_compute_pseudo); skip: + NAPI_GRO_CB(skb)->is_ipv6 = 1; return udp_gro_receive(head, skb, uh); flush: