FQ-PIE schedules independent PIE controllers per flow but exposes no per-flow AQM state. Without class-level statistics there is no way to observe the per-flow drop probability, queue delay, deficit or dequeue rate from userspace. Extend tc_fq_pie_xstats with a discriminated union, following the same pattern established by FQ-CoDel (TCA_FQ_CODEL_XSTATS_CLASS): - Rename the existing flat struct to tc_fq_pie_qd_stats. - Add tc_fq_pie_cl_stats with per-flow prob, delay, deficit, avg_dq_rate, and dq_rate_estimating fields. - Wrap both in tc_fq_pie_xstats with a type discriminator (TCA_FQ_PIE_XSTATS_QDISC / TCA_FQ_PIE_XSTATS_CLASS). Wire up fq_pie_class_ops (.walk, .dump, .dump_stats) so that 'tc -s class show' against an fq_pie qdisc reports per-flow state: prob per-flow PIE drop probability delay per-flow queue sojourn time (microseconds) deficit remaining DRR byte credits (signed integer) avg_dq_rate dequeue rate estimate in bytes/second (dq_rate_estimator mode only) dq_rate_estimating flag indicating active delay estimation mode Note: this changes the on-wire layout of tc_fq_pie_xstats. The type field now occupies the first four bytes, displacing the former packets_in field. Userspace must be updated concurrently; the companion iproute2 patch handles this. Fix the 'delay' field comment in struct tc_pie_xstats from "in ms" to "in microseconds" to match the kernel's PSCHED_TICKS2NS / NSEC_PER_USEC conversion. Also correct the avg_dq_rate comment in tc_pie_xstats from "bits/pie_time" to "bytes/second" to match the actual kernel conversion (avg_dq_rate * PSCHED_TICKS_PER_SEC >> PIE_SCALE). Signed-off-by: Hemendra M. Naik Signed-off-by: Vishal Kamath Signed-off-by: Mohit P. Tahiliani --- include/uapi/linux/pkt_sched.h | 29 +++++- net/sched/sch_fq_pie.c | 131 ++++++++++++++++++++++++--- tools/include/uapi/linux/pkt_sched.h | 4 +- 3 files changed, 146 insertions(+), 18 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 66e8072f44df..0ad895edcb07 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -910,9 +910,9 @@ enum { struct tc_pie_xstats { __u64 prob; /* current probability */ - __u32 delay; /* current delay in ms */ + __u32 delay; /* current delay in microseconds */ __u32 avg_dq_rate; /* current average dq_rate in - * bits/pie_time + * bytes/second */ __u32 dq_rate_estimating; /* is avg_dq_rate being calculated? */ __u32 packets_in; /* total number of packets enqueued */ @@ -943,7 +943,12 @@ enum { }; #define TCA_FQ_PIE_MAX (__TCA_FQ_PIE_MAX - 1) -struct tc_fq_pie_xstats { +enum { + TCA_FQ_PIE_XSTATS_QDISC, + TCA_FQ_PIE_XSTATS_CLASS, +}; + +struct tc_fq_pie_qd_stats { __u32 packets_in; /* total number of packets enqueued */ __u32 dropped; /* packets dropped due to fq_pie_action */ __u32 overlimit; /* dropped due to lack of space in queue */ @@ -955,6 +960,24 @@ struct tc_fq_pie_xstats { __u32 memory_usage; /* total memory across all queues */ }; +struct tc_fq_pie_cl_stats { + __u64 prob; /* current probability */ + __u32 delay; /* current delay in microseconds */ + __s32 deficit; /* number of remaining byte credits */ + __u32 avg_dq_rate; /* current average dq_rate in + * bytes/second + */ + __u32 dq_rate_estimating; /* is avg_dq_rate being calculated? */ +}; + +struct tc_fq_pie_xstats { + __u32 type; + union { + struct tc_fq_pie_qd_stats qdisc_stats; + struct tc_fq_pie_cl_stats class_stats; + }; +}; + /* CBS */ struct tc_cbs_qopt { __u8 offload; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 7becbf5362b3..d22c4e02d2d9 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -330,7 +330,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, /* tupdate is in jiffies */ if (tb[TCA_FQ_PIE_TUPDATE]) WRITE_ONCE(q->p_params.tupdate, - usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]))); + usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]))); if (tb[TCA_FQ_PIE_ALPHA]) WRITE_ONCE(q->p_params.alpha, @@ -509,24 +509,25 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb) static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_pie_sched_data *q = qdisc_priv(sch); - struct tc_fq_pie_xstats st = { 0 }; + struct tc_fq_pie_xstats st = { + .type = TCA_FQ_PIE_XSTATS_QDISC, + }; struct list_head *pos; - sch_tree_lock(sch); - - st.packets_in = q->stats.packets_in; - st.overlimit = q->stats.overlimit; - st.overmemory = q->overmemory; - st.dropped = q->stats.dropped; - st.ecn_mark = q->stats.ecn_mark; - st.new_flow_count = q->new_flow_count; - st.memory_usage = q->memory_usage; + st.qdisc_stats.packets_in = q->stats.packets_in; + st.qdisc_stats.overlimit = q->stats.overlimit; + st.qdisc_stats.overmemory = q->overmemory; + st.qdisc_stats.dropped = q->stats.dropped; + st.qdisc_stats.ecn_mark = q->stats.ecn_mark; + st.qdisc_stats.new_flow_count = q->new_flow_count; + st.qdisc_stats.memory_usage = q->memory_usage; + sch_tree_lock(sch); list_for_each(pos, &q->new_flows) - st.new_flows_len++; + st.qdisc_stats.new_flows_len++; list_for_each(pos, &q->old_flows) - st.old_flows_len++; + st.qdisc_stats.old_flows_len++; sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); @@ -561,7 +562,111 @@ static void fq_pie_destroy(struct Qdisc *sch) kvfree(q->flows); } +static struct Qdisc *fq_pie_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long fq_pie_find(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static unsigned long fq_pie_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + +static void fq_pie_unbind(struct Qdisc *q, unsigned long cl) +{ +} + +static struct tcf_block *fq_pie_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) +{ + struct fq_pie_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return q->block; +} + +static int fq_pie_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int fq_pie_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct fq_pie_sched_data *q = qdisc_priv(sch); + struct gnet_stats_queue qs = { 0 }; + struct tc_fq_pie_xstats xstats; + u32 idx = cl - 1; + + if (idx < q->flows_cnt) { + const struct fq_pie_flow *flow = &q->flows[idx]; + + memset(&xstats, 0, sizeof(xstats)); + xstats.type = TCA_FQ_PIE_XSTATS_CLASS; + xstats.class_stats.prob = READ_ONCE(flow->vars.prob) << BITS_PER_BYTE; + xstats.class_stats.delay = + ((u32)PSCHED_TICKS2NS(READ_ONCE(flow->vars.qdelay))) / + NSEC_PER_USEC; + xstats.class_stats.deficit = READ_ONCE(flow->deficit); + xstats.class_stats.dq_rate_estimating = + READ_ONCE(q->p_params.dq_rate_estimator); + + if (xstats.class_stats.dq_rate_estimating) { + xstats.class_stats.avg_dq_rate = + READ_ONCE(flow->vars.avg_dq_rate) * + (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; + } + + qs.qlen = READ_ONCE(flow->qlen); + qs.backlog = READ_ONCE(flow->backlog); + } + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) + return -1; + if (idx < q->flows_cnt) + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); + return 0; +} + +static void fq_pie_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct fq_pie_sched_data *q = qdisc_priv(sch); + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->flows_cnt; i++) { + if (list_empty(&q->flows[i].flowchain)) { + arg->count++; + continue; + } + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) + break; + } +} + +static const struct Qdisc_class_ops fq_pie_class_ops = { + .leaf = fq_pie_leaf, + .find = fq_pie_find, + .tcf_block = fq_pie_tcf_block, + .bind_tcf = fq_pie_bind, + .unbind_tcf = fq_pie_unbind, + .dump = fq_pie_dump_class, + .dump_stats = fq_pie_dump_class_stats, + .walk = fq_pie_walk, +}; + static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = { + .cl_ops = &fq_pie_class_ops, .id = "fq_pie", .priv_size = sizeof(struct fq_pie_sched_data), .enqueue = fq_pie_qdisc_enqueue, diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 587481a19433..9bc190f0b3cf 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -847,8 +847,8 @@ enum { struct tc_pie_xstats { __u32 prob; /* current probability */ - __u32 delay; /* current delay in ms */ - __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 delay; /* current delay in microseconds */ + __u32 avg_dq_rate; /* current average dq_rate in bytes/second */ __u32 packets_in; /* total number of packets enqueued */ __u32 dropped; /* packets dropped due to pie_action */ __u32 overlimit; /* dropped due to lack of space in queue */ -- 2.34.1