The new test exercise paths, where RTNL is needed, to catch lockdep splat: setsockopt MRT_INIT / MRT_DONE MRT_ADD_VIF / MRT_DEL_VIF MRT_ADD_MFC / MRT_DEL_MFC / MRT_ADD_MFC_PROXY / MRT_DEL_MFC_PROXY MRT_TABLE MRT_FLUSH rtnetlink RTM_NEWROUTE RTM_DELROUTE NETDEV_UNREGISTER I will extend this to cover IPv6 setsockopt() later. Signed-off-by: Kuniyuki Iwashima --- .../testing/selftests/net/forwarding/Makefile | 4 + tools/testing/selftests/net/forwarding/ipmr.c | 455 ++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 tools/testing/selftests/net/forwarding/ipmr.c diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index ff4a00d91a26..bbaf4d937dd8 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -133,6 +133,10 @@ TEST_FILES := \ tc_common.sh \ # end of TEST_FILES +TEST_GEN_PROGS := \ + ipmr +# end of TEST_GEN_PROGS + TEST_INCLUDES := \ $(wildcard ../lib/sh/*.sh) \ ../lib.sh \ diff --git a/tools/testing/selftests/net/forwarding/ipmr.c b/tools/testing/selftests/net/forwarding/ipmr.c new file mode 100644 index 000000000000..699d8237d226 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipmr.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2026 Google LLC */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest_harness.h" + +FIXTURE(ipmr) +{ + int netlink_sk; + int raw_sk; + int veth_ifindex; +}; + +FIXTURE_VARIANT(ipmr) +{ + int family; + int protocol; + int level; + int opts[MRT_MAX - MRT_BASE + 1]; +}; + +FIXTURE_VARIANT_ADD(ipmr, ipv4) +{ + .family = AF_INET, + .protocol = IPPROTO_IGMP, + .level = IPPROTO_IP, + .opts = { + MRT_INIT, + MRT_DONE, + MRT_ADD_VIF, + MRT_DEL_VIF, + MRT_ADD_MFC, + MRT_DEL_MFC, + MRT_VERSION, + MRT_ASSERT, + MRT_PIM, + MRT_TABLE, + MRT_ADD_MFC_PROXY, + MRT_DEL_MFC_PROXY, + MRT_FLUSH, + }, +}; + +struct mfc_attr { + int table; + __u32 origin; + __u32 group; + int ifindex; + bool proxy; +}; + +static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta, + int type, const void *data, int len) +{ + int unused; + + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + memcpy(RTA_DATA(rta), data, len); + + nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len); + + return RTA_NEXT(rta, unused); +} + +static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self, + __u16 nlmsg_type, struct mfc_attr *mfc_attr) +{ + struct { + struct nlmsghdr nlmsg; + struct rtmsg rtm; + char buf[4096]; + } req = { + .nlmsg = { + /* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */ + .nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_type = nlmsg_type, + }, + .rtm = { + /* hard requirements in rtm_to_ipmr_mfcc() */ + .rtm_family = RTNL_FAMILY_IPMR, + .rtm_dst_len = 32, + .rtm_type = RTN_MULTICAST, + .rtm_scope = RT_SCOPE_UNIVERSE, + .rtm_protocol = RTPROT_MROUTED, + }, + }; + struct nlmsghdr *nlmsg = &req.nlmsg; + struct nlmsgerr *errmsg; + struct rtattr *rta; + int err; + + rta = (struct rtattr *)&req.buf; + rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table)); + rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin)); + rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group)); + if (mfc_attr->ifindex) + rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex)); + if (mfc_attr->proxy) + rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0); + + err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0); + ASSERT_EQ(err, req.nlmsg.nlmsg_len); + + memset(&req, 0, sizeof(req)); + + err = recv(self->netlink_sk, &req, sizeof(req), 0); + ASSERT_TRUE(NLMSG_OK(nlmsg, err)); + ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type); + + errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg); + return errmsg->error; +} + +FIXTURE_SETUP(ipmr) +{ + struct ifreq ifr = { + .ifr_name = "veth0", + }; + int err; + + err = unshare(CLONE_NEWNET); + ASSERT_EQ(0, err); + + self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_LE(0, self->netlink_sk); + + self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol); + ASSERT_LT(0, self->raw_sk); + + err = system("ip link add veth0 type veth peer veth1"); + ASSERT_EQ(0, err); + + err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr); + ASSERT_EQ(0, err); + + self->veth_ifindex = ifr.ifr_ifindex; +} + +FIXTURE_TEARDOWN(ipmr) +{ + close(self->raw_sk); + close(self->netlink_sk); +} + +TEST_F(ipmr, mrt_init) +{ + int err, val = 0; /* any value is ok, but size must be int for MRT_INIT. */ + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_INIT - MRT_BASE], + &val, sizeof(val)); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DONE - MRT_BASE], + &val, sizeof(val)); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_vif_register) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_REGISTER, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_vif | grep -q pimreg"); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_del_vif_unreg) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_vif | grep -q veth0"); + ASSERT_EQ(0, err); + + /* VIF is removed along with its device. */ + err = system("ip link del veth0"); + ASSERT_EQ(0, err); + + /* mrt->vif_table[veth_ifindex]->dev is NULL. */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(-1, err); + ASSERT_EQ(EADDRNOTAVAIL, errno); +} + +TEST_F(ipmr, mrt_del_vif_netns_dismantle) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + /* Let cleanup_net() remove veth0 and VIF. */ +} + +TEST_F(ipmr, mrt_add_mfc) +{ + struct mfcctl mfc = {}; + int err; + + /* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE], + &mfc, sizeof(mfc)); + ASSERT_EQ(0, err); + + /* (0.0.0.0 -> 0.0.0.0) */ + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE], + &mfc, sizeof(mfc)); +} + +TEST_F(ipmr, mrt_add_mfc_proxy) +{ + struct mfcctl mfc = {}; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE], + &mfc, sizeof(mfc)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE], + &mfc, sizeof(mfc)); +} + +TEST_F(ipmr, mrt_add_mfc_netlink) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_mfc_netlink_proxy) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = true, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_mfc_netlink_no_vif) +{ + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .proxy = false, + }; + int err; + + /* netlink always requires RTA_IIF of an existing vif. */ + mfc_attr.ifindex = 0; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(-ENFILE, err); + + /* netlink always requires RTA_IIF of an existing vif. */ + mfc_attr.ifindex = self->veth_ifindex; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(-ENFILE, err); +} + +TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle) +{ + struct vifctl vifs[2] = { + { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }, + { + .vifc_vifi = 1, + .vifc_flags = VIFF_REGISTER, + } + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int i, err; + + for (i = 0; i < 2; i++) { + /* Create 2 VIFs just to avoid -ENFILE later. */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vifs[i], sizeof(vifs[i])); + ASSERT_EQ(0, err); + } + + /* Create a MFC for mrt->vif_table[0]. */ + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + /* Remove mrt->vif_table[0]. */ + err = system("ip link del veth0"); + ASSERT_EQ(0, err); + + /* MFC entry is NOT removed even if the tied VIF is removed... */ + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + /* ... and netlink is not capable of removing such an entry + * because netlink always requires a valid RTA_IIF ... :/ + */ + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(-ENODEV, err); + + /* It can be removed by setsockopt(), but let cleanup_net() remove this time. */ +} + +TEST_F(ipmr, mrt_table_flush) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int table_id = 92; + int err, flags; + + /* Set a random table id rather than RT_TABLE_DEFAULT. + * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT. + */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_TABLE - MRT_BASE], + &table_id, sizeof(table_id)); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + mfc_attr.table = table_id; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + /* Flush mrt->vif_table[] and all caches. */ + flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | + MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC; + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_FLUSH - MRT_BASE], + &flags, sizeof(flags)); + ASSERT_EQ(0, err); +} + +TEST_HARNESS_MAIN -- 2.53.0.414.gf7e9f6c205-goog