The gate action can be replaced while the hrtimer callback or dump path is walking the schedule list. Convert the parameters to an RCU-protected snapshot and swap updates under tcf_lock, freeing the previous snapshot via call_rcu(). When REPLACE omits the entry list, preserve the existing schedule so the effective state is unchanged. Fixes: a51c328df310 ("net: qos: introduce a gate control flow action") Cc: stable@vger.kernel.org Signed-off-by: Paul Moses --- include/net/tc_act/tc_gate.h | 33 ++++- net/sched/act_gate.c | 275 ++++++++++++++++++++++++----------- 2 files changed, 219 insertions(+), 89 deletions(-) diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index c1a67149c6b62..5223c00279d5a 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -51,47 +52,65 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) +static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + + return rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); +} + static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = tcf_gate_params_locked(a); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = tcf_gate_params_locked(a); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = tcf_gate_params_locked(a); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = tcf_gate_params_locked(a); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = tcf_gate_params_locked(a); + num_entries = p->num_entries; return num_entries; } @@ -105,7 +124,7 @@ static inline struct action_gate_entry u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = tcf_gate_params_locked(a); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list) diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c1f75f2727576..60c80e609ec3d 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -32,9 +32,12 @@ static ktime_t gate_get_time(struct tcf_gate *gact) return KTIME_MAX; } -static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +static void tcf_gate_params_free_rcu(struct rcu_head *head); + +static void gate_get_start_time(struct tcf_gate *gact, + const struct tcf_gate_params *param, + ktime_t *start) { - struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; u64 n; @@ -56,11 +59,10 @@ static void gate_start_timer(struct tcf_gate *gact, ktime_t start) { ktime_t expires; - expires = hrtimer_get_expires(&gact->hitimer); - if (expires == 0) - expires = KTIME_MAX; - - start = min_t(ktime_t, start, expires); + if (hrtimer_active(&gact->hitimer)) { + expires = hrtimer_get_expires(&gact->hitimer); + start = min_t(ktime_t, start, expires); + } hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT); } @@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) { struct tcf_gate *gact = container_of(timer, struct tcf_gate, hitimer); - struct tcf_gate_params *p = &gact->param; struct tcfg_gate_entry *next; + struct tcf_gate_params *p; ktime_t close_time, now; spin_lock(&gact->tcf_lock); + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); next = gact->next_entry; /* cycle start, clear pending bit, clear total octets */ @@ -225,6 +229,35 @@ static void release_entry_list(struct list_head *entries) } } +static int tcf_gate_copy_entries(struct tcf_gate_params *dst, + const struct tcf_gate_params *src, + struct netlink_ext_ack *extack) +{ + struct tcfg_gate_entry *entry; + int i = 0; + + list_for_each_entry(entry, &src->entries, list) { + struct tcfg_gate_entry *new; + + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + NL_SET_ERR_MSG(extack, "Not enough memory for entry"); + return -ENOMEM; + } + + new->index = entry->index; + new->gate_state = entry->gate_state; + new->interval = entry->interval; + new->ipv = entry->ipv; + new->maxoctets = entry->maxoctets; + list_add_tail(&new->list, &dst->entries); + i++; + } + + dst->num_entries = i; + return 0; +} + static int parse_gate_list(struct nlattr *list_attr, struct tcf_gate_params *sched, struct netlink_ext_ack *extack) @@ -261,7 +294,6 @@ static int parse_gate_list(struct nlattr *list_attr, } sched->num_entries = i; - return i; release_list: @@ -270,24 +302,44 @@ static int parse_gate_list(struct nlattr *list_attr, return err; } -static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, - enum tk_offsets tko, s32 clockid, - bool do_init) +static bool gate_timer_needs_cancel(u64 basetime, u64 old_basetime, + enum tk_offsets tko, + enum tk_offsets old_tko, + s32 clockid, s32 old_clockid) { - if (!do_init) { - if (basetime == gact->param.tcfg_basetime && - tko == gact->tk_offset && - clockid == gact->param.tcfg_clockid) - return; + return basetime != old_basetime || + clockid != old_clockid || + tko != old_tko; +} - spin_unlock_bh(&gact->tcf_lock); - hrtimer_cancel(&gact->hitimer); - spin_lock_bh(&gact->tcf_lock); +static int gate_clock_resolve(s32 clockid, enum tk_offsets *tko, + struct netlink_ext_ack *extack) +{ + switch (clockid) { + case CLOCK_REALTIME: + *tko = TK_OFFS_REAL; + return 0; + case CLOCK_MONOTONIC: + *tko = TK_OFFS_MAX; + return 0; + case CLOCK_BOOTTIME: + *tko = TK_OFFS_BOOT; + return 0; + case CLOCK_TAI: + *tko = TK_OFFS_TAI; + return 0; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + return -EINVAL; } - gact->param.tcfg_basetime = basetime; - gact->param.tcfg_clockid = clockid; - gact->tk_offset = tko; - hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); +} + +static void gate_setup_timer(struct tcf_gate *gact, s32 clockid, + enum tk_offsets tko) +{ + WRITE_ONCE(gact->tk_offset, tko); + hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, + HRTIMER_MODE_ABS_SOFT); } static int tcf_gate_init(struct net *net, struct nlattr *nla, @@ -296,15 +348,22 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); - enum tk_offsets tk_offset = TK_OFFS_TAI; + u64 cycletime = 0, basetime = 0, cycletime_ext = 0; + struct tcf_gate_params *p = NULL, *old_p = NULL; + enum tk_offsets old_tk_offset = TK_OFFS_TAI; + const struct tcf_gate_params *cur_p = NULL; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; + enum tk_offsets tko = TK_OFFS_TAI; struct tcf_chain *goto_ch = NULL; - u64 cycletime = 0, basetime = 0; - struct tcf_gate_params *p; + s32 timer_clockid = CLOCK_TAI; + bool use_old_entries = false; + s32 old_clockid = CLOCK_TAI; + bool need_cancel = false; s32 clockid = CLOCK_TAI; struct tcf_gate *gact; struct tc_gate *parm; + u64 old_basetime = 0; int ret = 0, err; u32 gflags = 0; s32 prio = -1; @@ -321,26 +380,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!tb[TCA_GATE_PARMS]) return -EINVAL; - if (tb[TCA_GATE_CLOCKID]) { + if (tb[TCA_GATE_CLOCKID]) clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); - switch (clockid) { - case CLOCK_REALTIME: - tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - return -EINVAL; - } - } parm = nla_data(tb[TCA_GATE_PARMS]); index = parm->index; @@ -366,6 +407,60 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return -EEXIST; } + gact = to_gate(*a); + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + err = -ENOMEM; + goto release_idr; + } + INIT_LIST_HEAD(&p->entries); + + use_old_entries = !tb[TCA_GATE_ENTRY_LIST]; + if (!use_old_entries) { + err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); + if (err < 0) + goto err_free; + use_old_entries = !err; + } + + if (ret == ACT_P_CREATED && use_old_entries) { + NL_SET_ERR_MSG(extack, "The entry list is empty"); + err = -EINVAL; + goto err_free; + } + + if (ret != ACT_P_CREATED) { + rcu_read_lock(); + cur_p = rcu_dereference(gact->param); + + old_basetime = cur_p->tcfg_basetime; + old_clockid = cur_p->tcfg_clockid; + old_tk_offset = READ_ONCE(gact->tk_offset); + + basetime = old_basetime; + cycletime_ext = cur_p->tcfg_cycletime_ext; + prio = cur_p->tcfg_priority; + gflags = cur_p->tcfg_flags; + + if (!tb[TCA_GATE_CLOCKID]) + clockid = old_clockid; + + err = 0; + if (use_old_entries) { + err = tcf_gate_copy_entries(p, cur_p, extack); + if (!err && !tb[TCA_GATE_CYCLE_TIME]) + cycletime = cur_p->tcfg_cycletime; + } + rcu_read_unlock(); + if (err) + goto err_free; + } + if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); @@ -375,25 +470,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); - gact = to_gate(*a); - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&gact->param.entries); + if (tb[TCA_GATE_CYCLE_TIME]) + cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); - if (err < 0) - goto release_idr; + if (tb[TCA_GATE_CYCLE_TIME_EXT]) + cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); - spin_lock_bh(&gact->tcf_lock); - p = &gact->param; + err = gate_clock_resolve(clockid, &tko, extack); + if (err) + goto err_free; + timer_clockid = clockid; - if (tb[TCA_GATE_CYCLE_TIME]) - cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); + need_cancel = ret != ACT_P_CREATED && + gate_timer_needs_cancel(basetime, old_basetime, + tko, old_tk_offset, + timer_clockid, old_clockid); - if (tb[TCA_GATE_ENTRY_LIST]) { - err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); - if (err < 0) - goto chain_put; - } + if (need_cancel) + hrtimer_cancel(&gact->hitimer); + + spin_lock_bh(&gact->tcf_lock); if (!cycletime) { struct tcfg_gate_entry *entry; @@ -404,20 +500,22 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, cycletime = cycle; if (!cycletime) { err = -EINVAL; - goto chain_put; + goto unlock; } } p->tcfg_cycletime = cycletime; + p->tcfg_cycletime_ext = cycletime_ext; - if (tb[TCA_GATE_CYCLE_TIME_EXT]) - p->tcfg_cycletime_ext = - nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); - - gate_setup_timer(gact, basetime, tk_offset, clockid, - ret == ACT_P_CREATED); + if (need_cancel || ret == ACT_P_CREATED) + gate_setup_timer(gact, timer_clockid, tko); p->tcfg_priority = prio; p->tcfg_flags = gflags; - gate_get_start_time(gact, &start); + p->tcfg_basetime = basetime; + p->tcfg_clockid = timer_clockid; + gate_get_start_time(gact, p, &start); + + old_p = rcu_replace_pointer(gact->param, p, + lockdep_is_held(&gact->tcf_lock)); gact->current_close_time = start; gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; @@ -434,33 +532,47 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); + if (old_p) + call_rcu(&old_p->rcu, tcf_gate_params_free_rcu); + return ret; -chain_put: +unlock: spin_unlock_bh(&gact->tcf_lock); +err_free: + release_entry_list(&p->entries); + kfree(p); +release_idr: if (goto_ch) tcf_chain_put_by_act(goto_ch); -release_idr: + /* action is not inserted in any list: it's safe to init hitimer * without taking tcf_lock. */ if (ret == ACT_P_CREATED) - gate_setup_timer(gact, gact->param.tcfg_basetime, - gact->tk_offset, gact->param.tcfg_clockid, - true); + gate_setup_timer(gact, timer_clockid, tko); tcf_idr_release(*a, bind); return err; } +static void tcf_gate_params_free_rcu(struct rcu_head *head) +{ + struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu); + + release_entry_list(&p->entries); + kfree(p); +} + static void tcf_gate_cleanup(struct tc_action *a) { struct tcf_gate *gact = to_gate(a); struct tcf_gate_params *p; - p = &gact->param; hrtimer_cancel(&gact->hitimer); - release_entry_list(&p->entries); + p = rcu_dereference_protected(gact->param, 1); + if (p) + call_rcu(&p->rcu, tcf_gate_params_free_rcu); } static int dumping_entry(struct sk_buff *skb, @@ -509,10 +621,9 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, struct nlattr *entry_list; struct tcf_t t; - spin_lock_bh(&gact->tcf_lock); - opt.action = gact->tcf_action; - - p = &gact->param; + rcu_read_lock(); + opt.action = READ_ONCE(gact->tcf_action); + p = rcu_dereference(gact->param); if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -552,12 +663,12 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD)) goto nla_put_failure; - spin_unlock_bh(&gact->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&gact->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } -- 2.53.0