Store the vid used on the bridge in the flow_offload_tuple, so it can be used later to identify fdb entries that relate to the tuple. The bridge_vid member is added to the structures nft_forward_info, nf_flow_route and flow_offload_tuple. It can now be passed from net_device_path->bridge.vlan_id to flow_offload_tuple->out.bridge_vid. Signed-off-by: Eric Woudstra --- include/net/netfilter/nf_flow_table.h | 2 ++ net/netfilter/nf_flow_table_core.c | 1 + net/netfilter/nft_flow_offload.c | 3 +++ 3 files changed, 6 insertions(+) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index c003cd194fa2..bac3b0e9e3a1 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -146,6 +146,7 @@ struct flow_offload_tuple { struct { u32 ifidx; u32 hw_ifidx; + u16 bridge_vid; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; @@ -212,6 +213,7 @@ struct nf_flow_route { struct { u32 ifindex; u32 hw_ifindex; + u16 bridge_vid; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 9441ac3d8c1a..992958db4a19 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -128,6 +128,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + flow_tuple->out.bridge_vid = route->tuple[dir].out.bridge_vid; dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 2148c4cde9e4..788bffbfac78 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -85,6 +85,7 @@ struct nft_forward_info { __u16 id; __be16 proto; } encap[NF_FLOW_TABLE_ENCAP_MAX]; + u16 bridge_vid; u8 num_encaps; u8 ingress_vlans; u8 h_source[ETH_ALEN]; @@ -159,6 +160,7 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, case DEV_PATH_BR_VLAN_KEEP: break; } + info->bridge_vid = path->bridge.vlan_id; info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; break; default: @@ -223,6 +225,7 @@ static void nft_dev_forward_path(struct nf_flow_route *route, memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); route->tuple[dir].out.ifindex = info.outdev->ifindex; route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex; + route->tuple[dir].out.bridge_vid = info.bridge_vid; route->tuple[dir].xmit_type = info.xmit_type; } } -- 2.50.0 In case of a bridge in the forward-fastpath or bridge-fastpath the fdb is used to create the tuple. In case of roaming at layer 2 level, for example 802.11r, the destination device is changed in the fdb. The destination device of a direct transmitting tuple is no longer valid and traffic is send to the wrong destination. Also the hardware offloaded fastpath is not valid anymore. In case of roaming, a switchdev notification is send to delete the old fdb entry. Upon receiving this notification, mark all direct transmitting flows with the same ifindex, vid and hardware address as the fdb entry to be teared down. The hardware offloaded fastpath is still in effect, so minimize the delay of the work queue by setting the delay to zero. Signed-off-by: Eric Woudstra --- net/netfilter/nf_flow_table_core.c | 88 ++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 992958db4a19..e906f93f4abb 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -13,6 +13,7 @@ #include #include #include +#include static DEFINE_MUTEX(flowtable_lock); static LIST_HEAD(flowtables); @@ -745,6 +746,86 @@ void nf_flow_table_cleanup(struct net_device *dev) } EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); +struct flow_cleanup_data { + int ifindex; + u16 vid; + char addr[ETH_ALEN]; + bool found; +}; + +struct flow_switchdev_event_work { + struct work_struct work; + struct flow_cleanup_data cud; +}; + +static void nf_flow_table_do_cleanup_addr(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data) +{ + struct flow_cleanup_data *cud = data; + + if ((flow->tuplehash[0].tuple.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT && + flow->tuplehash[0].tuple.out.ifidx == cud->ifindex && + flow->tuplehash[0].tuple.out.bridge_vid == cud->vid && + ether_addr_equal(flow->tuplehash[0].tuple.out.h_dest, cud->addr)) || + (flow->tuplehash[1].tuple.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT && + flow->tuplehash[1].tuple.out.ifidx == cud->ifindex && + flow->tuplehash[1].tuple.out.bridge_vid == cud->vid && + ether_addr_equal(flow->tuplehash[1].tuple.out.h_dest, cud->addr))) { + flow_offload_teardown(flow); + cud->found = true; + } +} + +static void nf_flow_table_switchdev_event_work(struct work_struct *work) +{ + struct flow_switchdev_event_work *switchdev_work = + container_of(work, struct flow_switchdev_event_work, work); + struct nf_flowtable *flowtable; + + mutex_lock(&flowtable_lock); + + list_for_each_entry(flowtable, &flowtables, list) { + switchdev_work->cud.found = false; + nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup_addr, + &switchdev_work->cud); + if (switchdev_work->cud.found) + mod_delayed_work(system_power_efficient_wq, + &flowtable->gc_work, 0); + } + + mutex_unlock(&flowtable_lock); + + kfree(switchdev_work); +} + +static int nf_flow_table_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct flow_switchdev_event_work *switchdev_work; + struct switchdev_notifier_fdb_info *fdb_info; + + if (event != SWITCHDEV_FDB_DEL_TO_DEVICE) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (WARN_ON(!switchdev_work)) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, nf_flow_table_switchdev_event_work); + fdb_info = ptr; + switchdev_work->cud.ifindex = fdb_info->info.dev->ifindex; + switchdev_work->cud.vid = fdb_info->vid; + ether_addr_copy(switchdev_work->cud.addr, fdb_info->addr); + + queue_work(system_long_wq, &switchdev_work->work); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_flow_table_switchdev_nb __read_mostly = { + .notifier_call = nf_flow_table_switchdev_event, +}; + void nf_flow_table_free(struct nf_flowtable *flow_table) { mutex_lock(&flowtable_lock); @@ -818,6 +899,10 @@ static int __init nf_flow_table_module_init(void) if (ret) goto out_offload; + ret = register_switchdev_notifier(&nf_flow_table_switchdev_nb); + if (ret < 0) + goto out_sw_noti; + ret = nf_flow_register_bpf(); if (ret) goto out_bpf; @@ -825,6 +910,8 @@ static int __init nf_flow_table_module_init(void) return 0; out_bpf: + unregister_switchdev_notifier(&nf_flow_table_switchdev_nb); +out_sw_noti: nf_flow_table_offload_exit(); out_offload: unregister_pernet_subsys(&nf_flow_table_net_ops); @@ -833,6 +920,7 @@ static int __init nf_flow_table_module_init(void) static void __exit nf_flow_table_module_exit(void) { + unregister_switchdev_notifier(&nf_flow_table_switchdev_nb); nf_flow_table_offload_exit(); unregister_pernet_subsys(&nf_flow_table_net_ops); } -- 2.50.0