The tdc test case 3719 ("Replace gate base-time action") fails with: Error: Empty schedule entry list. We have an error talking to the kernel The test runs a 'replace' command while only specifying 'base-time' and omitting the mandatory schedule entries. The act_gate action (and tc userspace) requires a schedule to be present; replace is atomic and does not support partial updates or inheriting the previous schedule. Update the test case to include a valid 'sched-entry' list in the replace command so it matches the kernel's expected behavior. Fixes: 4a1db5251cfac ("selftests/tc-testings: add selftests for gate action") Signed-off-by: Paul Moses --- tools/testing/selftests/tc-testing/tc-tests/actions/gate.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json index db645c22ad7be..67e406e4eba33 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json @@ -131,7 +131,7 @@ 255 ] ], - "cmdUnderTest": "$TC action replace action gate base-time 400000000000ns index 20", + "cmdUnderTest": "$TC action replace action gate base-time 400000000000ns sched-entry open 200000000ns -1 8000000b index 20", "expExitCode": "0", "verifyCmd": "$TC action get action gate index 20", "matchPattern": "action order [0-9]*: .*base-time 400s.*index 20 ref", -- 2.52.GIT Switch act_gate parameters to an RCU-protected pointer and update schedule changes using a prepare-then-swap pattern. This avoids races between the timer/data paths and configuration updates, and cancels the hrtimer before swapping schedules. A gate action replace could free and swap schedules while the hrtimer callback or data path still dereferences the old entries, leaving a use-after-free window during updates. The deferred swap and RCU free close that window. A reproducer is available on request. Also clear params on early error for newly created actions to avoid leaving a dangling reference, and reject overflowing cycle times. Fixes: a51c328df310 ("net: qos: introduce a gate control flow action") Cc: stable@vger.kernel.org Signed-off-by: Paul Moses --- include/net/tc_act/tc_gate.h | 43 +++++- net/sched/act_gate.c | 276 +++++++++++++++++++++++++++-------- 2 files changed, 248 insertions(+), 71 deletions(-) diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index c1a67149c6b62..5fa6a500b9288 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -53,45 +54,70 @@ struct tcf_gate { static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate *gact = to_gate(a); + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&a->tcfa_lock) || + lockdep_rtnl_is_held()); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate *gact = to_gate(a); + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&a->tcfa_lock) || + lockdep_rtnl_is_held()); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate *gact = to_gate(a); + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&a->tcfa_lock) || + lockdep_rtnl_is_held()); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate *gact = to_gate(a); + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&a->tcfa_lock) || + lockdep_rtnl_is_held()); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate *gact = to_gate(a); + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&a->tcfa_lock) || + lockdep_rtnl_is_held()); + num_entries = p->num_entries; return num_entries; } @@ -100,12 +126,15 @@ static inline struct action_gate_entry *tcf_gate_get_list(const struct tc_action *a) { struct action_gate_entry *oe; + struct tcf_gate *gact = to_gate(a); struct tcf_gate_params *p; struct tcfg_gate_entry *entry; u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&a->tcfa_lock) || + lockdep_rtnl_is_held()); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list) diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c1f75f2727576..6934df233df5e 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -32,9 +33,10 @@ static ktime_t gate_get_time(struct tcf_gate *gact) return KTIME_MAX; } -static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +static void gate_get_start_time(struct tcf_gate *gact, + struct tcf_gate_params *param, + ktime_t *start) { - struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; u64 n; @@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) { struct tcf_gate *gact = container_of(timer, struct tcf_gate, hitimer); - struct tcf_gate_params *p = &gact->param; + struct tcf_gate_params *p; struct tcfg_gate_entry *next; ktime_t close_time, now; spin_lock(&gact->tcf_lock); + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); next = gact->next_entry; /* cycle start, clear pending bit, clear total octets */ @@ -225,6 +229,14 @@ static void release_entry_list(struct list_head *entries) } } +static void tcf_gate_params_release(struct rcu_head *rcu) +{ + struct tcf_gate_params *p = container_of(rcu, struct tcf_gate_params, rcu); + + release_entry_list(&p->entries); + kfree(p); +} + static int parse_gate_list(struct nlattr *list_attr, struct tcf_gate_params *sched, struct netlink_ext_ack *extack) @@ -274,42 +286,65 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, enum tk_offsets tko, s32 clockid, bool do_init) { + struct tcf_gate_params *p; + if (!do_init) { - if (basetime == gact->param.tcfg_basetime && + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); + if (basetime == p->tcfg_basetime && tko == gact->tk_offset && - clockid == gact->param.tcfg_clockid) + clockid == p->tcfg_clockid) return; spin_unlock_bh(&gact->tcf_lock); hrtimer_cancel(&gact->hitimer); spin_lock_bh(&gact->tcf_lock); } - gact->param.tcfg_basetime = basetime; - gact->param.tcfg_clockid = clockid; gact->tk_offset = tko; hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); } +static int gate_calc_cycletime(struct list_head *entries, u64 *cycletime) +{ + struct tcfg_gate_entry *entry; + u64 sum = 0; + + list_for_each_entry(entry, entries, list) { + if (check_add_overflow(sum, (u64)entry->interval, &sum)) + return -EOVERFLOW; + } + + if (!sum) + return -EINVAL; + + *cycletime = sum; + return 0; +} + static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); - enum tk_offsets tk_offset = TK_OFFS_TAI; - bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; - u64 cycletime = 0, basetime = 0; - struct tcf_gate_params *p; - s32 clockid = CLOCK_TAI; + struct tcf_gate_params *p, *oldp; struct tcf_gate *gact; struct tc_gate *parm; - int ret = 0, err; - u32 gflags = 0; - s32 prio = -1; + struct tcf_gate_params newp = { }; ktime_t start; + u64 cycletime = 0, basetime = 0, cycletime_ext = 0; + enum tk_offsets tk_offset = TK_OFFS_TAI; + s32 clockid = CLOCK_TAI; + u32 gflags = 0; u32 index; + s32 prio = -1; + bool bind = flags & TCA_ACT_FLAGS_BIND; + bool clockid_set = false; + int ret = 0, err; + + INIT_LIST_HEAD(&newp.entries); if (!nla) return -EINVAL; @@ -323,6 +358,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (tb[TCA_GATE_CLOCKID]) { clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); + clockid_set = true; switch (clockid) { case CLOCK_REALTIME: tk_offset = TK_OFFS_REAL; @@ -349,9 +385,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (err < 0) return err; - if (err && bind) - return ACT_P_BOUND; - if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_gate_ops, bind, flags); @@ -361,94 +394,206 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { - tcf_idr_release(*a, bind); - return -EEXIST; + gact = to_gate(*a); + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + tcf_idr_release(*a, bind); + return -ENOMEM; + } + INIT_LIST_HEAD(&p->entries); + rcu_assign_pointer(gact->param, p); + gate_setup_timer(gact, basetime, tk_offset, clockid, true); + } else { + if (bind) + return ACT_P_BOUND; + + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { + tcf_idr_release(*a, bind); + return -EEXIST; + } + gact = to_gate(*a); } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + oldp = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->common.tcfa_lock)); + if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); + else if (ret != ACT_P_CREATED) + prio = oldp->tcfg_priority; if (tb[TCA_GATE_BASE_TIME]) basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]); + else if (ret != ACT_P_CREATED) + basetime = oldp->tcfg_basetime; if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); + else if (ret != ACT_P_CREATED) + gflags = oldp->tcfg_flags; + + if (!clockid_set) { + if (ret != ACT_P_CREATED) + clockid = oldp->tcfg_clockid; + else + clockid = CLOCK_TAI; + switch (clockid) { + case CLOCK_REALTIME: + tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto put_chain; + } + } - gact = to_gate(*a); - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&gact->param.entries); - - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); - if (err < 0) - goto release_idr; + if (ret != ACT_P_CREATED && clockid_set && + clockid != oldp->tcfg_clockid) { + NL_SET_ERR_MSG(extack, "Clockid change is not supported"); + err = -EINVAL; + goto put_chain; + } - spin_lock_bh(&gact->tcf_lock); - p = &gact->param; + if (tb[TCA_GATE_ENTRY_LIST]) { + INIT_LIST_HEAD(&newp.entries); + err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], &newp, extack); + if (err <= 0) { + if (!err) + NL_SET_ERR_MSG(extack, + "Missing gate schedule (entry list)"); + err = -EINVAL; + goto put_chain; + } + newp.num_entries = err; + } else if (ret == ACT_P_CREATED) { + NL_SET_ERR_MSG(extack, "Missing schedule entry list"); + err = -EINVAL; + goto put_chain; + } if (tb[TCA_GATE_CYCLE_TIME]) cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); - if (tb[TCA_GATE_ENTRY_LIST]) { - err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); - if (err < 0) - goto chain_put; - } + if (tb[TCA_GATE_CYCLE_TIME_EXT]) + cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + else if (ret != ACT_P_CREATED) + cycletime_ext = oldp->tcfg_cycletime_ext; if (!cycletime) { - struct tcfg_gate_entry *entry; - ktime_t cycle = 0; + struct list_head *entries; - list_for_each_entry(entry, &p->entries, list) - cycle = ktime_add_ns(cycle, entry->interval); - cycletime = cycle; - if (!cycletime) { + if (!list_empty(&newp.entries)) + entries = &newp.entries; + else if (ret != ACT_P_CREATED) + entries = &oldp->entries; + else + entries = NULL; + + if (!entries) { + NL_SET_ERR_MSG(extack, "Invalid cycle time"); err = -EINVAL; - goto chain_put; + goto release_new_entries; + } + + err = gate_calc_cycletime(entries, &cycletime); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Invalid cycle time"); + goto release_new_entries; } } - p->tcfg_cycletime = cycletime; - if (tb[TCA_GATE_CYCLE_TIME_EXT]) - p->tcfg_cycletime_ext = - nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + if (ret != ACT_P_CREATED) + hrtimer_cancel(&gact->hitimer); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + err = -ENOMEM; + goto release_new_entries; + } - gate_setup_timer(gact, basetime, tk_offset, clockid, - ret == ACT_P_CREATED); + INIT_LIST_HEAD(&p->entries); p->tcfg_priority = prio; + p->tcfg_basetime = basetime; + p->tcfg_cycletime = cycletime; + p->tcfg_cycletime_ext = cycletime_ext; p->tcfg_flags = gflags; - gate_get_start_time(gact, &start); + p->tcfg_clockid = clockid; + + if (!list_empty(&newp.entries)) { + list_splice_init(&newp.entries, &p->entries); + p->num_entries = newp.num_entries; + } else if (ret != ACT_P_CREATED) { + struct tcfg_gate_entry *entry, *ne; + + list_for_each_entry(entry, &oldp->entries, list) { + ne = kmemdup(entry, sizeof(*ne), GFP_KERNEL); + if (!ne) { + err = -ENOMEM; + goto free_p; + } + INIT_LIST_HEAD(&ne->list); + list_add_tail(&ne->list, &p->entries); + } + p->num_entries = oldp->num_entries; + } - gact->current_close_time = start; - gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; + spin_lock_bh(&gact->tcf_lock); + gate_setup_timer(gact, basetime, tk_offset, clockid, ret == ACT_P_CREATED); + gate_get_start_time(gact, p, &start); + gact->current_close_time = start; gact->next_entry = list_first_entry(&p->entries, struct tcfg_gate_entry, list); + gact->current_entry_octets = 0; + gact->current_gate_status = GATE_ACT_PENDING; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); gate_start_timer(gact, start); + oldp = rcu_replace_pointer(gact->param, p, + lockdep_is_held(&gact->tcf_lock)); + spin_unlock_bh(&gact->tcf_lock); + if (oldp) + call_rcu(&oldp->rcu, tcf_gate_params_release); + if (goto_ch) tcf_chain_put_by_act(goto_ch); return ret; -chain_put: - spin_unlock_bh(&gact->tcf_lock); - +free_p: + kfree(p); +release_new_entries: + release_entry_list(&newp.entries); +put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: - /* action is not inserted in any list: it's safe to init hitimer - * without taking tcf_lock. - */ - if (ret == ACT_P_CREATED) - gate_setup_timer(gact, gact->param.tcfg_basetime, - gact->tk_offset, gact->param.tcfg_clockid, - true); + if (ret == ACT_P_CREATED) { + p = rcu_dereference_protected(gact->param, 1); + if (p) { + release_entry_list(&p->entries); + kfree(p); + rcu_assign_pointer(gact->param, NULL); + } + } tcf_idr_release(*a, bind); return err; } @@ -458,9 +603,11 @@ static void tcf_gate_cleanup(struct tc_action *a) struct tcf_gate *gact = to_gate(a); struct tcf_gate_params *p; - p = &gact->param; hrtimer_cancel(&gact->hitimer); - release_entry_list(&p->entries); + + p = rcu_dereference_protected(gact->param, 1); + if (p) + call_rcu(&p->rcu, tcf_gate_params_release); } static int dumping_entry(struct sk_buff *skb, @@ -512,7 +659,8 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, spin_lock_bh(&gact->tcf_lock); opt.action = gact->tcf_action; - p = &gact->param; + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; -- 2.52.GIT Zero-initialize the tc_gate dump struct to avoid leaking padding bytes to userspace. Without clearing the struct, uninitialized stack padding can be copied into the netlink reply during action dumps. Fixes: a51c328df310 ("net: qos: introduce a gate control flow action") Cc: stable@vger.kernel.org Signed-off-by: Paul Moses --- net/sched/act_gate.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 6934df233df5e..043ad856361d7 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -644,19 +644,18 @@ static int dumping_entry(struct sk_buff *skb, static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb_tail_pointer(skb); struct tcf_gate *gact = to_gate(a); - struct tc_gate opt = { - .index = gact->tcf_index, - .refcnt = refcount_read(&gact->tcf_refcnt) - ref, - .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind, - }; struct tcfg_gate_entry *entry; struct tcf_gate_params *p; struct nlattr *entry_list; + struct tc_gate opt = { }; struct tcf_t t; + unsigned char *b = skb_tail_pointer(skb); spin_lock_bh(&gact->tcf_lock); + opt.index = gact->tcf_index; + opt.refcnt = refcount_read(&gact->tcf_refcnt) - ref; + opt.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind; opt.action = gact->tcf_action; p = rcu_dereference_protected(gact->param, -- 2.52.GIT