In the conntrack hook it may not always be the case that: skb_network_header(skb) == skb->data, i.e. skb_network_offset(skb) is zero. This is problematic when L4 function nf_conntrack_handle_packet() is accessing L3 data. This function uses thoff and ip_hdr() to finds it's data. But it also calculates the checksum. nf_checksum() and nf_checksum_partial() both use lower skb-checksum functions that are based on using skb->data. Adjust for skb_network_offset(skb), so that the checksum is calculated correctly. Signed-off-by: Eric Woudstra --- net/netfilter/utils.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 008419db815a..7b33fe63c5fa 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -124,16 +124,25 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u8 protocol, unsigned short family) { + unsigned int nhpull = skb_network_offset(skb); __sum16 csum = 0; + if (WARN_ON(!skb_pointer_if_linear(skb, nhpull, 0))) + return 0; + + /* pull/push because the lower csum functions assume that + * skb_network_offset(skb) is zero. + */ + __skb_pull(skb, nhpull); switch (family) { case AF_INET: - csum = nf_ip_checksum(skb, hook, dataoff, protocol); + csum = nf_ip_checksum(skb, hook, dataoff - nhpull, protocol); break; case AF_INET6: - csum = nf_ip6_checksum(skb, hook, dataoff, protocol); + csum = nf_ip6_checksum(skb, hook, dataoff - nhpull, protocol); break; } + __skb_push(skb, nhpull); return csum; } @@ -143,18 +152,25 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u8 protocol, unsigned short family) { + unsigned int nhpull = skb_network_offset(skb); __sum16 csum = 0; + if (WARN_ON(!skb_pointer_if_linear(skb, nhpull, 0))) + return 0; + + /* See nf_checksum() */ + __skb_pull(skb, nhpull); switch (family) { case AF_INET: - csum = nf_ip_checksum_partial(skb, hook, dataoff, len, - protocol); + csum = nf_ip_checksum_partial(skb, hook, dataoff - nhpull, + len, protocol); break; case AF_INET6: - csum = nf_ip6_checksum_partial(skb, hook, dataoff, len, - protocol); + csum = nf_ip6_checksum_partial(skb, hook, dataoff - nhpull, + len, protocol); break; } + __skb_push(skb, nhpull); return csum; } -- 2.50.0 This adds the capability to conntrack 802.1ad, QinQ, PPPoE and PPPoE-in-Q packets that are passing a bridge, only when a conntrack zone is set. Signed-off-by: Eric Woudstra --- net/bridge/netfilter/nf_conntrack_bridge.c | 97 ++++++++++++++++++---- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 6482de4d8750..d3745af60f3a 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -237,58 +237,121 @@ static int nf_ct_br_ipv6_check(const struct sk_buff *skb) return 0; } +/** + * nf_ct_bridge_pre_inner - advances network_header to the header that follows + * the pppoe- or vlan-header. + */ + +static int nf_ct_bridge_pre_inner(struct sk_buff *skb, __be16 *proto, u32 *len) +{ + switch (*proto) { + case htons(ETH_P_PPP_SES): { + struct ppp_hdr { + struct pppoe_hdr hdr; + __be16 proto; + } *ph; + + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return -1; + ph = (struct ppp_hdr *)(skb->data); + switch (ph->proto) { + case htons(PPP_IP): + *proto = htons(ETH_P_IP); + *len = ntohs(ph->hdr.length) - 2; + skb_set_network_header(skb, PPPOE_SES_HLEN); + return PPPOE_SES_HLEN; + case htons(PPP_IPV6): + *proto = htons(ETH_P_IPV6); + *len = ntohs(ph->hdr.length) - 2; + skb_set_network_header(skb, PPPOE_SES_HLEN); + return PPPOE_SES_HLEN; + } + break; + } + case htons(ETH_P_8021Q): { + struct vlan_hdr *vhdr; + + if (!pskb_may_pull(skb, VLAN_HLEN)) + return -1; + vhdr = (struct vlan_hdr *)(skb->data); + *proto = vhdr->h_vlan_encapsulated_proto; + skb_set_network_header(skb, VLAN_HLEN); + return VLAN_HLEN; + } + } + return 0; +} + static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_hook_state bridge_state = *state; + int ret = NF_ACCEPT, offset = 0; enum ip_conntrack_info ctinfo; + u32 len, pppoe_len = 0; struct nf_conn *ct; - u32 len; - int ret; + __be16 proto; ct = nf_ct_get(skb, &ctinfo); if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) return NF_ACCEPT; - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!pskb_may_pull(skb, sizeof(struct iphdr))) + proto = skb->protocol; + + if (ct && nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)) != + NF_CT_DEFAULT_ZONE_ID) { + offset = nf_ct_bridge_pre_inner(skb, &proto, &pppoe_len); + if (offset < 0) return NF_ACCEPT; + } + + switch (proto) { + case htons(ETH_P_IP): + if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) + goto do_not_track; len = skb_ip_totlen(skb); - if (pskb_trim_rcsum(skb, len)) - return NF_ACCEPT; + if (pppoe_len && pppoe_len != len) + goto do_not_track; + if (pskb_trim_rcsum(skb, offset + len)) + goto do_not_track; if (nf_ct_br_ip_check(skb)) - return NF_ACCEPT; + goto do_not_track; bridge_state.pf = NFPROTO_IPV4; ret = nf_ct_br_defrag4(skb, &bridge_state); break; case htons(ETH_P_IPV6): - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return NF_ACCEPT; + if (!pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) + goto do_not_track; len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); - if (pskb_trim_rcsum(skb, len)) - return NF_ACCEPT; + if (pppoe_len && pppoe_len != len) + goto do_not_track; + if (pskb_trim_rcsum(skb, offset + len)) + goto do_not_track; if (nf_ct_br_ipv6_check(skb)) - return NF_ACCEPT; + goto do_not_track; bridge_state.pf = NFPROTO_IPV6; ret = nf_ct_br_defrag6(skb, &bridge_state); break; default: nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - return NF_ACCEPT; + goto do_not_track; } - if (ret != NF_ACCEPT) - return ret; + if (ret == NF_ACCEPT) + ret = nf_conntrack_in(skb, &bridge_state); + +do_not_track: + if (offset && ret == NF_ACCEPT) + skb_reset_network_header(skb); - return nf_conntrack_in(skb, &bridge_state); + return ret; } static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, -- 2.50.0 This adds the capability to evaluate 802.1ad, QinQ, PPPoE and PPPoE-in-Q packets in the bridge filter chain. Signed-off-by: Eric Woudstra --- include/net/netfilter/nf_tables.h | 48 +++++++++++++++++++++++++++++++ net/netfilter/nft_chain_filter.c | 17 +++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e2128663b160..4a55972881b1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,53 @@ static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) pkt->fragoff = 0; } +/** + * nft_set_bridge_pktinfo - calls nft_set_pktinfo and advances + * network_header to the header that follows the pppoe- or vlan-header. + */ +static inline int nft_set_bridge_pktinfo(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state, + __be16 *proto) +{ + nft_set_pktinfo(pkt, skb, state); + + switch (*proto) { + case htons(ETH_P_PPP_SES): { + struct ppp_hdr { + struct pppoe_hdr hdr; + __be16 proto; + } *ph; + + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return -1; + ph = (struct ppp_hdr *)(skb->data); + switch (ph->proto) { + case htons(PPP_IP): + *proto = htons(ETH_P_IP); + skb_set_network_header(skb, PPPOE_SES_HLEN); + return PPPOE_SES_HLEN; + case htons(PPP_IPV6): + *proto = htons(ETH_P_IPV6); + skb_set_network_header(skb, PPPOE_SES_HLEN); + return PPPOE_SES_HLEN; + } + break; + } + case htons(ETH_P_8021Q): { + struct vlan_hdr *vhdr; + + if (!pskb_may_pull(skb, VLAN_HLEN)) + return -1; + vhdr = (struct vlan_hdr *)(skb->data); + *proto = vhdr->h_vlan_encapsulated_proto; + skb_set_network_header(skb, VLAN_HLEN); + return VLAN_HLEN; + } + } + return 0; +} + /** * struct nft_verdict - nf_tables verdict * diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index b16185e9a6dd..a5174adb1abc 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -233,10 +233,16 @@ nft_do_chain_bridge(void *priv, const struct nf_hook_state *state) { struct nft_pktinfo pkt; + int ret, offset; + __be16 proto; - nft_set_pktinfo(&pkt, skb, state); + proto = eth_hdr(skb)->h_proto; + + offset = nft_set_bridge_pktinfo(&pkt, skb, state, &proto); + if (offset < 0) + return NF_ACCEPT; - switch (eth_hdr(skb)->h_proto) { + switch (proto) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; @@ -248,7 +254,12 @@ nft_do_chain_bridge(void *priv, break; } - return nft_do_chain(&pkt, priv); + ret = nft_do_chain(&pkt, priv); + + if (offset && ret == NF_ACCEPT) + skb_reset_network_header(skb); + + return ret; } static const struct nft_chain_type nft_chain_filter_bridge = { -- 2.50.0