From: Willem de Bruijn When inserting an EDT packet with time before flow->time_next_packet, update the flow and possibly queue next delivery time. Reinsert the flow into the q->delayed rb-tree to position correctly and to have fq_check_throttled set wake-up at the right next time. Factor RB tree insertion out fq_flow_set_throttled to avoid open coding twice. EDT packets do not take precedence over queue rate limit. Skip this new step if a queue limit is set. EDT packets do take precedence over per-socket rate limits, as can be seen from fq_dequeue reading sk_pacing_rate if !skb->tstamp. With this change the so_txtime selftest sends packets in the expected order. Fixes: eeb84aa0d0af ("net_sched: sch_fq: do not assume EDT packets are ordered") Assisted-by: Gemini:gemini-3 Signed-off-by: Willem de Bruijn --- On net this has a conflict on so_txtime.py. Treat this as an improvement and send to net-next only (not stable material). --- net/sched/sch_fq.c | 34 ++++++++++++++++--- .../selftests/drivers/net/so_txtime.py | 2 +- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 796cb8046a90..33783c9f8e16 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -217,7 +217,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) fq_flow_add_tail(q, f, OLD_FLOW); } -static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) +static void fq_flow_rb_insert(struct fq_sched_data *q, struct fq_flow *f) { struct rb_node **p = &q->delayed.rb_node, *parent = NULL; @@ -233,14 +233,18 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) } rb_link_node(&f->rate_node, parent, p); rb_insert_color(&f->rate_node, &q->delayed); - q->throttled_flows++; - q->stat_throttled++; - f->next = &throttled; if (q->time_next_delayed_flow > f->time_next_packet) q->time_next_delayed_flow = f->time_next_packet; } +static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) +{ + fq_flow_rb_insert(q, f); + q->throttled_flows++; + q->stat_throttled++; + f->next = &throttled; +} static struct kmem_cache *fq_flow_cachep __read_mostly; @@ -539,6 +543,24 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } +static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow, + u64 time_to_send, u64 now) +{ + if (time_to_send <= now) { + fq_flow_unset_throttled(q, flow); + if (q->time_next_delayed_flow == flow->time_next_packet) { + struct rb_node *p = rb_first(&q->delayed); + + q->time_next_delayed_flow = p ? rb_entry(p, struct fq_flow, rate_node)->time_next_packet : ~0ULL; + } + flow->time_next_packet = time_to_send; + } else { + rb_erase(&flow->rate_node, &q->delayed); + flow->time_next_packet = time_to_send; + fq_flow_rb_insert(q, flow); + } +} + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -596,6 +618,10 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Note: this overwrites f->age */ flow_queue_add(f, skb); + if (fq_skb_cb(skb)->time_to_send < f->time_next_packet && skb->tstamp && + fq_flow_is_throttled(f) && q->flow_max_rate == ~0UL) + fq_flow_adjust_timer(q, f, fq_skb_cb(skb)->time_to_send, now); + qdisc_qstats_backlog_inc(sch, skb); qdisc_qlen_inc(sch); diff --git a/tools/testing/selftests/drivers/net/so_txtime.py b/tools/testing/selftests/drivers/net/so_txtime.py index e7de8fe22c1e..5d4388bfc6dd 100755 --- a/tools/testing/selftests/drivers/net/so_txtime.py +++ b/tools/testing/selftests/drivers/net/so_txtime.py @@ -53,7 +53,7 @@ def _test_variants_mono(): ["zero_delay", "a,0", "a,0"], ["one_pkt", "a,10", "a,10"], ["in_order", "a,10,b,20", "a,10,b,20"], - ["reverse_order", "a,20,b,10", "b,20,a,20"], + ["reverse_order", "a,20,b,10", "b,10,a,20"], ]: name = f"v{ipver}_{testcase[0]}" yield KsftNamedVariant(name, ipver, testcase[1], testcase[2]) -- 2.54.0.746.g67dd491aae-goog