Prepare mq_dump_common(), mqprio_dump() and mqprio_dump_class_stats() for RTNL avoidance. Use private variables instead of assuming sch->bstats and sch->qstats can be used when folding stats from children. This means the children qdisc spinlocks no longer need to be acquired. Add qdisc_qlen_lockless() helper, and change gnet_stats_add_basic() prototype. Signed-off-by: Eric Dumazet --- include/net/gen_stats.h | 9 ++++-- include/net/sch_generic.h | 5 ++++ net/core/gen_estimator.c | 24 +++++++-------- net/core/gen_stats.c | 22 ++++++-------- net/sched/sch_mq.c | 28 ++++++++++------- net/sched/sch_mqprio.c | 63 +++++++++++++++++---------------------- 6 files changed, 76 insertions(+), 75 deletions(-) diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 7aa2b8e1fb298c4f994a745b114fc4da785ddf4b..5484b67298e3fe94fe84f0e929799362d21499df 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -21,6 +21,11 @@ struct gnet_stats_basic_sync { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +struct gnet_stats { + u64 bytes; + u64 packets; +}; + struct net_rate_estimator; struct gnet_dump { @@ -49,9 +54,9 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, bool running); -void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, +void gnet_stats_add_basic(struct gnet_stats *bstats, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, bool running); + const struct gnet_stats_basic_sync *b, bool running); int gnet_stats_copy_basic_hw(struct gnet_dump *d, struct gnet_stats_basic_sync __percpu *cpu, struct gnet_stats_basic_sync *b, bool running); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 8e4e12692a1048f5e626b89c4db9e0339b16265d..f85aca5c1a445debd52a4e7b359141cf90cb7b55 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -542,6 +542,11 @@ static inline int qdisc_qlen(const struct Qdisc *q) return q->q.qlen; } +static inline int qdisc_qlen_lockless(const struct Qdisc *q) +{ + return READ_ONCE(q->q.qlen); +} + static inline void qdisc_qlen_inc(struct Qdisc *q) { WRITE_ONCE(q->q.qlen, q->q.qlen + 1); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index c34e58c6c3e666743e72978f9a78cf7f95a360c3..40990aee45590f2c56c070b0d28f856fc82d1f55 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -60,9 +60,10 @@ struct net_rate_estimator { }; static void est_fetch_counters(struct net_rate_estimator *e, - struct gnet_stats_basic_sync *b) + struct gnet_stats *b) { - gnet_stats_basic_sync_init(b); + b->packets = 0; + b->bytes = 0; if (e->stats_lock) spin_lock(e->stats_lock); @@ -76,18 +77,15 @@ static void est_fetch_counters(struct net_rate_estimator *e, static void est_timer(struct timer_list *t) { struct net_rate_estimator *est = timer_container_of(est, t, timer); - struct gnet_stats_basic_sync b; - u64 b_bytes, b_packets; + struct gnet_stats b; u64 rate, brate; est_fetch_counters(est, &b); - b_bytes = u64_stats_read(&b.bytes); - b_packets = u64_stats_read(&b.packets); - brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log); + brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - rate = (b_packets - est->last_packets) << (10 - est->intvl_log); + rate = (b.packets - est->last_packets) << (10 - est->intvl_log); rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); preempt_disable_nested(); @@ -97,8 +95,8 @@ static void est_timer(struct timer_list *t) write_seqcount_end(&est->seq); preempt_enable_nested(); - est->last_bytes = b_bytes; - est->last_packets = b_packets; + est->last_bytes = b.bytes; + est->last_packets = b.packets; est->next_jiffies += ((HZ/4) << est->intvl_log); @@ -138,7 +136,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats, { struct gnet_estimator *parm = nla_data(opt); struct net_rate_estimator *old, *est; - struct gnet_stats_basic_sync b; + struct gnet_stats b; int intvl_log; if (nla_len(opt) < sizeof(*parm)) @@ -172,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats, est_fetch_counters(est, &b); if (lock) local_bh_enable(); - est->last_bytes = u64_stats_read(&b.bytes); - est->last_packets = u64_stats_read(&b.packets); + est->last_bytes = b.bytes; + est->last_packets = b.packets; if (lock) spin_lock_bh(lock); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 1a2380e74272de8eaf3d4ef453e56105a31e9edf..29a178bebcc13d199e71ef0b09d852bd9dbf3378 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -123,12 +123,13 @@ void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) } EXPORT_SYMBOL(gnet_stats_basic_sync_init); -static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, +static void gnet_stats_add_basic_cpu(struct gnet_stats *bstats, struct gnet_stats_basic_sync __percpu *cpu) { - u64 t_bytes = 0, t_packets = 0; int i; + bstats->bytes = 0; + bstats->packets = 0; for_each_possible_cpu(i) { struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); unsigned int start; @@ -140,19 +141,16 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, packets = u64_stats_read(&bcpu->packets); } while (u64_stats_fetch_retry(&bcpu->syncp, start)); - t_bytes += bytes; - t_packets += packets; + bstats->bytes += bytes; + bstats->packets += packets; } - _bstats_update(bstats, t_bytes, t_packets); } -void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, +void gnet_stats_add_basic(struct gnet_stats *bstats, struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, bool running) + const struct gnet_stats_basic_sync *b, bool running) { unsigned int start; - u64 bytes = 0; - u64 packets = 0; WARN_ON_ONCE((cpu || running) && in_hardirq()); @@ -163,11 +161,9 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, do { if (running) start = u64_stats_fetch_begin(&b->syncp); - bytes = u64_stats_read(&b->bytes); - packets = u64_stats_read(&b->packets); + bstats->bytes = u64_stats_read(&b->bytes); + bstats->packets = u64_stats_read(&b->packets); } while (running && u64_stats_fetch_retry(&b->syncp, start)); - - _bstats_update(bstats, bytes, packets); } EXPORT_SYMBOL(gnet_stats_add_basic); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index a0133a7b9d3b09a0d2a6064234c8fdef60dbf955..1e4522436620e5ee3a0417996476b37d9abca299 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -143,13 +143,12 @@ EXPORT_SYMBOL_NS_GPL(mq_attach, "NET_SCHED_INTERNAL"); void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); - struct Qdisc *qdisc; + struct gnet_stats_queue qstats = { 0 }; + struct gnet_stats bstats = { 0 }; + const struct Qdisc *qdisc; + unsigned int qlen = 0; unsigned int ntx; - sch->q.qlen = 0; - gnet_stats_basic_sync_init(&sch->bstats); - memset(&sch->qstats, 0, sizeof(sch->qstats)); - /* MQ supports lockless qdiscs. However, statistics accounting needs * to account for all, none, or a mix of locked and unlocked child * qdiscs. Percpu stats are added to counters in-band and locking @@ -157,16 +156,23 @@ void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb) */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); - spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, &qdisc->bstats, false); - gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); + qlen += qdisc_qlen_lockless(qdisc); } + u64_stats_set(&sch->bstats.bytes, bstats.bytes); + u64_stats_set(&sch->bstats.packets, bstats.packets); + + WRITE_ONCE(sch->qstats.qlen, qstats.qlen); + WRITE_ONCE(sch->qstats.backlog, qstats.backlog); + WRITE_ONCE(sch->qstats.drops, qstats.drops); + WRITE_ONCE(sch->qstats.requeues, qstats.requeues); + WRITE_ONCE(sch->qstats.overlimits, qstats.overlimits); + + WRITE_ONCE(sch->q.qlen, qlen); } EXPORT_SYMBOL_NS_GPL(mq_dump_common, "NET_SCHED_INTERNAL"); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index d35624e5869a4a6a12612886b2cd9cdac7b0b471..ac6d7d0a35309a5375425790f89f587601511cc7 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -554,15 +554,13 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); + struct gnet_stats_queue qstats = { 0 }; struct tc_mqprio_qopt opt = { 0 }; + struct gnet_stats bstats = { 0 }; + const struct Qdisc *qdisc; unsigned int qlen = 0; - struct Qdisc *qdisc; unsigned int ntx; - qlen = 0; - gnet_stats_basic_sync_init(&sch->bstats); - memset(&sch->qstats, 0, sizeof(sch->qstats)); - /* MQ supports lockless qdiscs. However, statistics accounting needs * to account for all, none, or a mix of locked and unlocked child * qdiscs. Percpu stats are added to counters in-band and locking @@ -570,16 +568,22 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); - spin_lock_bh(qdisc_lock(qdisc)); - gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, &qdisc->bstats, false); - gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, &qdisc->qstats); qlen += qdisc_qlen(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); } + u64_stats_set(&sch->bstats.bytes, bstats.bytes); + u64_stats_set(&sch->bstats.packets, bstats.packets); + + WRITE_ONCE(sch->qstats.qlen, qstats.qlen); + WRITE_ONCE(sch->qstats.backlog, qstats.backlog); + WRITE_ONCE(sch->qstats.drops, qstats.drops); + WRITE_ONCE(sch->qstats.requeues, qstats.requeues); + WRITE_ONCE(sch->qstats.overlimits, qstats.overlimits); + WRITE_ONCE(sch->q.qlen, qlen); mqprio_qopt_reconstruct(dev, &opt); @@ -661,45 +665,32 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) - __releases(d->lock) - __acquires(d->lock) { if (cl >= TC_H_MIN_PRIORITY) { - int i; - __u32 qlen; - struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_sync bstats; struct net_device *dev = qdisc_dev(sch); struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; - - gnet_stats_basic_sync_init(&bstats); - /* Drop lock here it will be reclaimed before touching - * statistics this is required because the d->lock we - * hold here is the look on dev_queue->qdisc_sleeping - * also acquired below. - */ - if (d->lock) - spin_unlock_bh(d->lock); + struct gnet_stats_queue qstats = { 0 }; + struct gnet_stats_basic_sync bstats; + struct gnet_stats _bstats = { 0 }; + u32 qlen = 0; + int i; for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); - struct Qdisc *qdisc = rtnl_dereference(q->qdisc); + const struct Qdisc *qdisc = rtnl_dereference(q->qdisc); - spin_lock_bh(qdisc_lock(qdisc)); - - gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, + gnet_stats_add_basic(&_bstats, qdisc->cpu_bstats, &qdisc->bstats, false); gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); + qlen += qdisc_qlen_lockless(qdisc); } - qlen = qdisc_qlen(sch) + qstats.qlen; + u64_stats_init(&bstats.syncp); + u64_stats_set(&bstats.bytes, _bstats.bytes); + u64_stats_set(&bstats.packets, _bstats.packets); + + qlen = qlen + qstats.qlen; - /* Reclaim root sleeping lock before completing stats */ - if (d->lock) - spin_lock_bh(d->lock); if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; -- 2.53.0.1213.gd9a14994de-goog