netfilter: exthdr: tcp option set support

This allows setting 2 and 4 byte quantities in the tcp option space.
Main purpose is to allow native replacement for xt_TCPMSS to
work around pmtu blackholes.

Writes to kind and len are now allowed at the moment, it does not seem
useful to do this as it causes corruption of the tcp option space.

We can always lift this restriction later if a use-case appears.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Florian Westphal 2017-08-08 15:15:29 +02:00 committed by Pablo Neira Ayuso
parent 5e7d695a48
commit 99d1712bc4
2 changed files with 165 additions and 3 deletions

View File

@ -732,7 +732,8 @@ enum nft_exthdr_op {
* @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
* @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
* @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32)
* @NFTA_EXTHDR_OP: option match type (NLA_U8)
* @NFTA_EXTHDR_OP: option match type (NLA_U32)
* @NFTA_EXTHDR_SREG: option match type (NLA_U32)
*/
enum nft_exthdr_attributes {
NFTA_EXTHDR_UNSPEC,
@ -742,6 +743,7 @@ enum nft_exthdr_attributes {
NFTA_EXTHDR_LEN,
NFTA_EXTHDR_FLAGS,
NFTA_EXTHDR_OP,
NFTA_EXTHDR_SREG,
__NFTA_EXTHDR_MAX
};
#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1)

View File

@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@ -23,6 +24,7 @@ struct nft_exthdr {
u8 len;
u8 op;
enum nft_registers dreg:8;
enum nft_registers sreg:8;
u8 flags;
};
@ -124,6 +126,88 @@ err:
regs->verdict.code = NFT_BREAK;
}
static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
struct nft_exthdr *priv = nft_expr_priv(expr);
unsigned int i, optl, tcphdr_len, offset;
struct tcphdr *tcph;
u8 *opt;
u32 src;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
return;
opt = (u8 *)tcph;
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
union {
u8 octet;
__be16 v16;
__be32 v32;
} old, new;
optl = optlen(opt, i);
if (priv->type != opt[i])
continue;
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return;
if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
return;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
&tcphdr_len);
if (!tcph)
return;
src = regs->data[priv->sreg];
offset = i + priv->offset;
switch (priv->len) {
case 2:
old.v16 = get_unaligned((u16 *)(opt + offset));
new.v16 = src;
switch (priv->type) {
case TCPOPT_MSS:
/* increase can cause connection to stall */
if (ntohs(old.v16) <= ntohs(new.v16))
return;
break;
}
if (old.v16 == new.v16)
return;
put_unaligned(new.v16, (u16*)(opt + offset));
inet_proto_csum_replace2(&tcph->check, pkt->skb,
old.v16, new.v16, false);
break;
case 4:
new.v32 = src;
old.v32 = get_unaligned((u32 *)(opt + offset));
if (old.v32 == new.v32)
return;
put_unaligned(new.v32, (u32*)(opt + offset));
inet_proto_csum_replace4(&tcph->check, pkt->skb,
old.v32, new.v32, false);
break;
default:
WARN_ON_ONCE(1);
break;
}
return;
}
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@ -180,6 +264,55 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, priv->len);
}
static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
int err;
if (!tb[NFTA_EXTHDR_SREG] ||
!tb[NFTA_EXTHDR_TYPE] ||
!tb[NFTA_EXTHDR_OFFSET] ||
!tb[NFTA_EXTHDR_LEN])
return -EINVAL;
if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
return -EINVAL;
err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
if (err < 0)
return err;
err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
if (err < 0)
return err;
if (offset < 2)
return -EOPNOTSUPP;
switch (len) {
case 2: break;
case 4: break;
default:
return -EOPNOTSUPP;
}
err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
if (err < 0)
return err;
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
priv->flags = flags;
priv->op = op;
return nft_validate_register_load(priv->sreg, priv->len);
}
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@ -208,6 +341,16 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
return nft_exthdr_dump_common(skb, priv);
}
static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_exthdr *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
return -1;
return nft_exthdr_dump_common(skb, priv);
}
static struct nft_expr_type nft_exthdr_type;
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
@ -225,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.dump = nft_exthdr_dump,
};
static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
.eval = nft_exthdr_tcp_set_eval,
.init = nft_exthdr_tcp_set_init,
.dump = nft_exthdr_dump_set,
};
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@ -234,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_EXTHDR_OP])
return &nft_exthdr_ipv6_ops;
if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
return ERR_PTR(-EOPNOTSUPP);
op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
switch (op) {
case NFT_EXTHDR_OP_TCPOPT:
if (tb[NFTA_EXTHDR_SREG])
return &nft_exthdr_tcp_set_ops;
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_tcp_ops;
break;
case NFT_EXTHDR_OP_IPV6:
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_ipv6_ops;
break;
}
return ERR_PTR(-EOPNOTSUPP);