The tdc test case 3719 ("Replace gate base-time action") fails with:
Error: Empty schedule entry list.
We have an error talking to the kernel
The test runs a 'replace' command while only specifying 'base-time' and
omitting the mandatory schedule entries. The act_gate action (and tc
userspace) requires a schedule to be present; replace is atomic and does
not support partial updates or inheriting the previous schedule.
Update the test case to include a valid 'sched-entry' list in the
replace command so it matches the kernel's expected behavior.
Fixes: 4a1db5251cfac ("selftests/tc-testings: add selftests for gate action")
Signed-off-by: Paul Moses
---
tools/testing/selftests/tc-testing/tc-tests/actions/gate.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
index db645c22ad7be..67e406e4eba33 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
@@ -131,7 +131,7 @@
255
]
],
- "cmdUnderTest": "$TC action replace action gate base-time 400000000000ns index 20",
+ "cmdUnderTest": "$TC action replace action gate base-time 400000000000ns sched-entry open 200000000ns -1 8000000b index 20",
"expExitCode": "0",
"verifyCmd": "$TC action get action gate index 20",
"matchPattern": "action order [0-9]*: .*base-time 400s.*index 20 ref",
--
2.52.GIT
Switch act_gate parameters to an RCU-protected pointer and update schedule
changes using a prepare-then-swap pattern. This avoids races between the
timer/data paths and configuration updates, and cancels the hrtimer
before swapping schedules.
A gate action replace could free and swap schedules while the hrtimer
callback or data path still dereferences the old entries, leaving a
use-after-free window during updates. The deferred swap and RCU free
close that window. A reproducer is available on request.
Also clear params on early error for newly created actions to avoid
leaving a dangling reference, and reject overflowing cycle times.
Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses
---
include/net/tc_act/tc_gate.h | 43 +++++-
net/sched/act_gate.c | 276 +++++++++++++++++++++++++++--------
2 files changed, 248 insertions(+), 71 deletions(-)
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index c1a67149c6b62..5fa6a500b9288 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
s32 tcfg_clockid;
size_t num_entries;
struct list_head entries;
+ struct rcu_head rcu;
};
#define GATE_ACT_GATE_OPEN BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
struct tcf_gate {
struct tc_action common;
- struct tcf_gate_params param;
+ struct tcf_gate_params __rcu *param;
u8 current_gate_status;
ktime_t current_close_time;
u32 current_entry_octets;
@@ -53,45 +54,70 @@ struct tcf_gate {
static inline s32 tcf_gate_prio(const struct tc_action *a)
{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
s32 tcfg_prio;
- tcfg_prio = to_gate(a)->param.tcfg_priority;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&a->tcfa_lock) ||
+ lockdep_rtnl_is_held());
+ tcfg_prio = p->tcfg_priority;
return tcfg_prio;
}
static inline u64 tcf_gate_basetime(const struct tc_action *a)
{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
u64 tcfg_basetime;
- tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&a->tcfa_lock) ||
+ lockdep_rtnl_is_held());
+ tcfg_basetime = p->tcfg_basetime;
return tcfg_basetime;
}
static inline u64 tcf_gate_cycletime(const struct tc_action *a)
{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
u64 tcfg_cycletime;
- tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&a->tcfa_lock) ||
+ lockdep_rtnl_is_held());
+ tcfg_cycletime = p->tcfg_cycletime;
return tcfg_cycletime;
}
static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
u64 tcfg_cycletimeext;
- tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&a->tcfa_lock) ||
+ lockdep_rtnl_is_held());
+ tcfg_cycletimeext = p->tcfg_cycletime_ext;
return tcfg_cycletimeext;
}
static inline u32 tcf_gate_num_entries(const struct tc_action *a)
{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
u32 num_entries;
- num_entries = to_gate(a)->param.num_entries;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&a->tcfa_lock) ||
+ lockdep_rtnl_is_held());
+ num_entries = p->num_entries;
return num_entries;
}
@@ -100,12 +126,15 @@ static inline struct action_gate_entry
*tcf_gate_get_list(const struct tc_action *a)
{
struct action_gate_entry *oe;
+ struct tcf_gate *gact = to_gate(a);
struct tcf_gate_params *p;
struct tcfg_gate_entry *entry;
u32 num_entries;
int i = 0;
- p = &to_gate(a)->param;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&a->tcfa_lock) ||
+ lockdep_rtnl_is_held());
num_entries = p->num_entries;
list_for_each_entry(entry, &p->entries, list)
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c1f75f2727576..6934df233df5e 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -32,9 +33,10 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void gate_get_start_time(struct tcf_gate *gact,
+ struct tcf_gate_params *param,
+ ktime_t *start)
{
- struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
u64 n;
@@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
{
struct tcf_gate *gact = container_of(timer, struct tcf_gate,
hitimer);
- struct tcf_gate_params *p = &gact->param;
+ struct tcf_gate_params *p;
struct tcfg_gate_entry *next;
ktime_t close_time, now;
spin_lock(&gact->tcf_lock);
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
next = gact->next_entry;
/* cycle start, clear pending bit, clear total octets */
@@ -225,6 +229,14 @@ static void release_entry_list(struct list_head *entries)
}
}
+static void tcf_gate_params_release(struct rcu_head *rcu)
+{
+ struct tcf_gate_params *p = container_of(rcu, struct tcf_gate_params, rcu);
+
+ release_entry_list(&p->entries);
+ kfree(p);
+}
+
static int parse_gate_list(struct nlattr *list_attr,
struct tcf_gate_params *sched,
struct netlink_ext_ack *extack)
@@ -274,42 +286,65 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
enum tk_offsets tko, s32 clockid,
bool do_init)
{
+ struct tcf_gate_params *p;
+
if (!do_init) {
- if (basetime == gact->param.tcfg_basetime &&
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
+ if (basetime == p->tcfg_basetime &&
tko == gact->tk_offset &&
- clockid == gact->param.tcfg_clockid)
+ clockid == p->tcfg_clockid)
return;
spin_unlock_bh(&gact->tcf_lock);
hrtimer_cancel(&gact->hitimer);
spin_lock_bh(&gact->tcf_lock);
}
- gact->param.tcfg_basetime = basetime;
- gact->param.tcfg_clockid = clockid;
gact->tk_offset = tko;
hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT);
}
+static int gate_calc_cycletime(struct list_head *entries, u64 *cycletime)
+{
+ struct tcfg_gate_entry *entry;
+ u64 sum = 0;
+
+ list_for_each_entry(entry, entries, list) {
+ if (check_add_overflow(sum, (u64)entry->interval, &sum))
+ return -EOVERFLOW;
+ }
+
+ if (!sum)
+ return -EINVAL;
+
+ *cycletime = sum;
+ return 0;
+}
+
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
- enum tk_offsets tk_offset = TK_OFFS_TAI;
- bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
- u64 cycletime = 0, basetime = 0;
- struct tcf_gate_params *p;
- s32 clockid = CLOCK_TAI;
+ struct tcf_gate_params *p, *oldp;
struct tcf_gate *gact;
struct tc_gate *parm;
- int ret = 0, err;
- u32 gflags = 0;
- s32 prio = -1;
+ struct tcf_gate_params newp = { };
ktime_t start;
+ u64 cycletime = 0, basetime = 0, cycletime_ext = 0;
+ enum tk_offsets tk_offset = TK_OFFS_TAI;
+ s32 clockid = CLOCK_TAI;
+ u32 gflags = 0;
u32 index;
+ s32 prio = -1;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
+ bool clockid_set = false;
+ int ret = 0, err;
+
+ INIT_LIST_HEAD(&newp.entries);
if (!nla)
return -EINVAL;
@@ -323,6 +358,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_CLOCKID]) {
clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ clockid_set = true;
switch (clockid) {
case CLOCK_REALTIME:
tk_offset = TK_OFFS_REAL;
@@ -349,9 +385,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
- if (err && bind)
- return ACT_P_BOUND;
-
if (!err) {
ret = tcf_idr_create_from_flags(tn, index, est, a,
&act_gate_ops, bind, flags);
@@ -361,94 +394,206 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ gact = to_gate(*a);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&p->entries);
+ rcu_assign_pointer(gact->param, p);
+ gate_setup_timer(gact, basetime, tk_offset, clockid, true);
+ } else {
+ if (bind)
+ return ACT_P_BOUND;
+
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+ gact = to_gate(*a);
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ oldp = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->common.tcfa_lock));
+
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
+ else if (ret != ACT_P_CREATED)
+ prio = oldp->tcfg_priority;
if (tb[TCA_GATE_BASE_TIME])
basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
+ else if (ret != ACT_P_CREATED)
+ basetime = oldp->tcfg_basetime;
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
+ else if (ret != ACT_P_CREATED)
+ gflags = oldp->tcfg_flags;
+
+ if (!clockid_set) {
+ if (ret != ACT_P_CREATED)
+ clockid = oldp->tcfg_clockid;
+ else
+ clockid = CLOCK_TAI;
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ err = -EINVAL;
+ goto put_chain;
+ }
+ }
- gact = to_gate(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&gact->param.entries);
-
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
- if (err < 0)
- goto release_idr;
+ if (ret != ACT_P_CREATED && clockid_set &&
+ clockid != oldp->tcfg_clockid) {
+ NL_SET_ERR_MSG(extack, "Clockid change is not supported");
+ err = -EINVAL;
+ goto put_chain;
+ }
- spin_lock_bh(&gact->tcf_lock);
- p = &gact->param;
+ if (tb[TCA_GATE_ENTRY_LIST]) {
+ INIT_LIST_HEAD(&newp.entries);
+ err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], &newp, extack);
+ if (err <= 0) {
+ if (!err)
+ NL_SET_ERR_MSG(extack,
+ "Missing gate schedule (entry list)");
+ err = -EINVAL;
+ goto put_chain;
+ }
+ newp.num_entries = err;
+ } else if (ret == ACT_P_CREATED) {
+ NL_SET_ERR_MSG(extack, "Missing schedule entry list");
+ err = -EINVAL;
+ goto put_chain;
+ }
if (tb[TCA_GATE_CYCLE_TIME])
cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
- if (tb[TCA_GATE_ENTRY_LIST]) {
- err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
- if (err < 0)
- goto chain_put;
- }
+ if (tb[TCA_GATE_CYCLE_TIME_EXT])
+ cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ else if (ret != ACT_P_CREATED)
+ cycletime_ext = oldp->tcfg_cycletime_ext;
if (!cycletime) {
- struct tcfg_gate_entry *entry;
- ktime_t cycle = 0;
+ struct list_head *entries;
- list_for_each_entry(entry, &p->entries, list)
- cycle = ktime_add_ns(cycle, entry->interval);
- cycletime = cycle;
- if (!cycletime) {
+ if (!list_empty(&newp.entries))
+ entries = &newp.entries;
+ else if (ret != ACT_P_CREATED)
+ entries = &oldp->entries;
+ else
+ entries = NULL;
+
+ if (!entries) {
+ NL_SET_ERR_MSG(extack, "Invalid cycle time");
err = -EINVAL;
- goto chain_put;
+ goto release_new_entries;
+ }
+
+ err = gate_calc_cycletime(entries, &cycletime);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Invalid cycle time");
+ goto release_new_entries;
}
}
- p->tcfg_cycletime = cycletime;
- if (tb[TCA_GATE_CYCLE_TIME_EXT])
- p->tcfg_cycletime_ext =
- nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ if (ret != ACT_P_CREATED)
+ hrtimer_cancel(&gact->hitimer);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ err = -ENOMEM;
+ goto release_new_entries;
+ }
- gate_setup_timer(gact, basetime, tk_offset, clockid,
- ret == ACT_P_CREATED);
+ INIT_LIST_HEAD(&p->entries);
p->tcfg_priority = prio;
+ p->tcfg_basetime = basetime;
+ p->tcfg_cycletime = cycletime;
+ p->tcfg_cycletime_ext = cycletime_ext;
p->tcfg_flags = gflags;
- gate_get_start_time(gact, &start);
+ p->tcfg_clockid = clockid;
+
+ if (!list_empty(&newp.entries)) {
+ list_splice_init(&newp.entries, &p->entries);
+ p->num_entries = newp.num_entries;
+ } else if (ret != ACT_P_CREATED) {
+ struct tcfg_gate_entry *entry, *ne;
+
+ list_for_each_entry(entry, &oldp->entries, list) {
+ ne = kmemdup(entry, sizeof(*ne), GFP_KERNEL);
+ if (!ne) {
+ err = -ENOMEM;
+ goto free_p;
+ }
+ INIT_LIST_HEAD(&ne->list);
+ list_add_tail(&ne->list, &p->entries);
+ }
+ p->num_entries = oldp->num_entries;
+ }
- gact->current_close_time = start;
- gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
+ spin_lock_bh(&gact->tcf_lock);
+ gate_setup_timer(gact, basetime, tk_offset, clockid, ret == ACT_P_CREATED);
+ gate_get_start_time(gact, p, &start);
+ gact->current_close_time = start;
gact->next_entry = list_first_entry(&p->entries,
struct tcfg_gate_entry, list);
+ gact->current_entry_octets = 0;
+ gact->current_gate_status = GATE_ACT_PENDING;
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
gate_start_timer(gact, start);
+ oldp = rcu_replace_pointer(gact->param, p,
+ lockdep_is_held(&gact->tcf_lock));
+
spin_unlock_bh(&gact->tcf_lock);
+ if (oldp)
+ call_rcu(&oldp->rcu, tcf_gate_params_release);
+
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
return ret;
-chain_put:
- spin_unlock_bh(&gact->tcf_lock);
-
+free_p:
+ kfree(p);
+release_new_entries:
+ release_entry_list(&newp.entries);
+put_chain:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
- /* action is not inserted in any list: it's safe to init hitimer
- * without taking tcf_lock.
- */
- if (ret == ACT_P_CREATED)
- gate_setup_timer(gact, gact->param.tcfg_basetime,
- gact->tk_offset, gact->param.tcfg_clockid,
- true);
+ if (ret == ACT_P_CREATED) {
+ p = rcu_dereference_protected(gact->param, 1);
+ if (p) {
+ release_entry_list(&p->entries);
+ kfree(p);
+ rcu_assign_pointer(gact->param, NULL);
+ }
+ }
tcf_idr_release(*a, bind);
return err;
}
@@ -458,9 +603,11 @@ static void tcf_gate_cleanup(struct tc_action *a)
struct tcf_gate *gact = to_gate(a);
struct tcf_gate_params *p;
- p = &gact->param;
hrtimer_cancel(&gact->hitimer);
- release_entry_list(&p->entries);
+
+ p = rcu_dereference_protected(gact->param, 1);
+ if (p)
+ call_rcu(&p->rcu, tcf_gate_params_release);
}
static int dumping_entry(struct sk_buff *skb,
@@ -512,7 +659,8 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
spin_lock_bh(&gact->tcf_lock);
opt.action = gact->tcf_action;
- p = &gact->param;
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
--
2.52.GIT
Zero-initialize the tc_gate dump struct to avoid leaking padding bytes
to userspace. Without clearing the struct, uninitialized stack padding
can be copied into the netlink reply during action dumps.
Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses
---
net/sched/act_gate.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 6934df233df5e..043ad856361d7 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -644,19 +644,18 @@ static int dumping_entry(struct sk_buff *skb,
static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
- unsigned char *b = skb_tail_pointer(skb);
struct tcf_gate *gact = to_gate(a);
- struct tc_gate opt = {
- .index = gact->tcf_index,
- .refcnt = refcount_read(&gact->tcf_refcnt) - ref,
- .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
- };
struct tcfg_gate_entry *entry;
struct tcf_gate_params *p;
struct nlattr *entry_list;
+ struct tc_gate opt = { };
struct tcf_t t;
+ unsigned char *b = skb_tail_pointer(skb);
spin_lock_bh(&gact->tcf_lock);
+ opt.index = gact->tcf_index;
+ opt.refcnt = refcount_read(&gact->tcf_refcnt) - ref;
+ opt.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind;
opt.action = gact->tcf_action;
p = rcu_dereference_protected(gact->param,
--
2.52.GIT