There is a pattern of bugs that end up creating UAFs or null ptr derefs. The majority of these bugs follow the formula below: a) create a nonsense hierarchy of qdiscs which has no practical value, b) start sending packets Optional c) netlink cmds to change hierarchy some more; It's more fun if you can get packets stuck - the formula in this case includes non work-conserving qdiscs somewhere in the hierarchy Optional d dependent on c) send more packets e) profit Current init/change qdisc APIs are localised to validate only within the constraint of a single qdisc. So catching #a or #c is a challenge. Our policy, when said bugs are presented, is to "make it work" by modifying generally used data structures and code, but these come at the expense of adding special checks for corner cases which are nonsensical to begin with. The goal of this patchset is to create an equivalent to PCI quirks, which will catch nonsensical hierarchies in #a and #c and reject such a config. With that in mind, we are proposing the addition of a new qdisc op (quirk_chk). We introduce, as a first example, the quirk_chk op to netem. Its purpose here is to validate whether the user is attempting to add 2 netem duplicates in the same qdisc tree which will be forbidden unless the root qdisc is multiqueue. Here is an example that should now work: DEV="eth0" NUM_QUEUES=4 DUPLICATE_PERCENT="5%" tc qdisc del dev $DEV root > /dev/null 2>&1 tc qdisc add dev $DEV root handle 1: mq for i in $(seq 1 $NUM_QUEUES); do HANDLE_ID=$((i * 10)) PARENT_ID="1:$i" tc qdisc add dev $DEV parent $PARENT_ID handle \ ${HANDLE_ID}: netem duplicate $DUPLICATE_PERCENT done Suggested-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira --- v1 -> v2: - Simplify process of getting root qdisc in netem_quirk_chk - Use parent's major directly instead of looking up parent qdisc in netem_quirk_chk - Call parse_attrs in netem_quirk_chk to avoid issue caught by syzbot Link to v1: https://lore.kernel.org/netdev/20251124223749.503979-1-victor@mojatatu.com/ --- include/net/sch_generic.h | 3 +++ net/sched/sch_api.c | 12 ++++++++++++ net/sched/sch_netem.c | 40 +++++++++++++++++++++++++++------------ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 94966692ccdf..60450372c5d5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -313,6 +313,9 @@ struct Qdisc_ops { u32 block_index); void (*egress_block_set)(struct Qdisc *sch, u32 block_index); + int (*quirk_chk)(struct Qdisc *sch, + struct nlattr *arg, + struct netlink_ext_ack *extack); u32 (*ingress_block_get)(struct Qdisc *sch); u32 (*egress_block_get)(struct Qdisc *sch); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f56b18c8aebf..a850df437691 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1315,6 +1315,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } + if (ops->quirk_chk) { + err = ops->quirk_chk(sch, tca[TCA_OPTIONS], extack); + if (err != 0) + goto err_out3; + } + if (ops->init) { err = ops->init(sch, tca[TCA_OPTIONS], extack); if (err != 0) @@ -1378,6 +1384,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); return -EOPNOTSUPP; } + if (sch->ops->quirk_chk) { + err = sch->ops->quirk_chk(sch, tca[TCA_OPTIONS], + extack); + if (err != 0) + return err; + } err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); if (err) return err; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index eafc316ae319..ceece2ae37bc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -975,13 +975,27 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, static const struct Qdisc_class_ops netem_class_ops; -static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, - struct netlink_ext_ack *extack) +static int netem_quirk_chk(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { + struct nlattr *tb[TCA_NETEM_MAX + 1]; + struct tc_netem_qopt *qopt; struct Qdisc *root, *q; + struct net_device *dev; + bool root_is_mq; + bool duplicates; unsigned int i; + int ret; + + ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); + if (ret < 0) + return ret; - root = qdisc_root_sleeping(sch); + qopt = nla_data(opt); + duplicates = qopt->duplicate; + + dev = sch->dev_queue->dev; + root = rtnl_dereference(dev->qdisc); if (sch != root && root->ops->cl_ops == &netem_class_ops) { if (duplicates || @@ -992,19 +1006,25 @@ static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, if (!qdisc_dev(root)) return 0; + root_is_mq = root->flags & TCQ_F_MQROOT; + hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { if (sch != q && q->ops->cl_ops == &netem_class_ops) { if (duplicates || - ((struct netem_sched_data *)qdisc_priv(q))->duplicate) - goto err; + ((struct netem_sched_data *)qdisc_priv(q))->duplicate) { + if (!root_is_mq || + TC_H_MAJ(q->parent) != root->handle || + TC_H_MAJ(q->parent) != TC_H_MAJ(sch->parent)) + goto err; + } } } return 0; err: - NL_SET_ERR_MSG(extack, - "netem: cannot mix duplicating netems with other netems in tree"); + NL_SET_ERR_MSG_MOD(extack, + "cannot mix duplicating netems with other netems in tree unless root is multiqueue"); return -EINVAL; } @@ -1066,11 +1086,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; - - ret = check_netem_in_tree(sch, qopt->duplicate, extack); - if (ret) - goto unlock; - q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. @@ -1352,6 +1367,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .destroy = netem_destroy, .change = netem_change, .dump = netem_dump, + .quirk_chk = netem_quirk_chk, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("netem"); -- 2.51.0