GEM has registers to configure the Tx SRAM segments distribution across queues. The reset value is apprioriate (even spread) but we need to care if/when number of active queues is modified (or if we inherited unevenly initialised hardware from bootloader). To distribute segments, we take as input the number of queues (bp->num_queues) and the number of segments (found inside DCFG6). Its output is a number of segments for each queue, formatted as powers-of-two (eg 2 for queue 0 means it has 2^2=4 segments). As the distribution logic is quite complex (at least its initial versions had bugs), it is kunit-tested in macb_kunit.c and the implementation lives in macb_utils.c. To test: ⟩ env --unset=CROSS_COMPILE make ARCH=um mrproper ⟩ env --unset=CROSS_COMPILE ./tools/testing/kunit/kunit.py run \ --kconfig_add CONFIG_NET=y \ --kconfig_add CONFIG_COMMON_CLK=y macb Signed-off-by: Théo Lebrun --- drivers/net/ethernet/cadence/Kconfig | 12 +++++ drivers/net/ethernet/cadence/Makefile | 5 +- drivers/net/ethernet/cadence/macb.h | 8 ++++ drivers/net/ethernet/cadence/macb_kunit.c | 77 +++++++++++++++++++++++++++++++ drivers/net/ethernet/cadence/macb_main.c | 15 ++++++ drivers/net/ethernet/cadence/macb_utils.c | 56 ++++++++++++++++++++++ 6 files changed, 172 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 5b2a461dfd28..a901f74fd4ab 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -51,4 +51,16 @@ config MACB_PCI To compile this driver as a module, choose M here: the module will be called macb_pci. +config MACB_KUNIT_TEST + tristate "KUnit test for MACB" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Build KUnit tests for the MACB driver. + + For more information on KUnit and unit tests in general, + please refer to the KUnit documentation. + + If unsure, say N. + endif # NET_VENDOR_CADENCE diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile index 1f33cdca9a3c..a0740da68649 100644 --- a/drivers/net/ethernet/cadence/Makefile +++ b/drivers/net/ethernet/cadence/Makefile @@ -2,7 +2,7 @@ # # Makefile for the Atmel network device drivers. # -macb-y := macb_main.o +macb-y := macb_main.o macb_utils.o ifeq ($(CONFIG_MACB_USE_HWSTAMP),y) macb-y += macb_ptp.o @@ -10,3 +10,6 @@ endif obj-$(CONFIG_MACB) += macb.o obj-$(CONFIG_MACB_PCI) += macb_pci.o + +obj-$(CONFIG_MACB_KUNIT_TEST) += macb_test.o +macb_test-y := macb_kunit.o macb_utils.o diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index b08afe340996..0464e774273a 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -197,6 +197,9 @@ #define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */ #define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */ +#define GEM_TXQSEGALLOC_LOWER 0x05A0 /* Tx queue segment allocation (low) */ +#define GEM_TXQSEGALLOC_UPPER 0x05A4 /* Tx queue segment allocation (high) */ + /* Screener Type 2 match registers */ #define GEM_SCRT2 0x540 @@ -549,6 +552,8 @@ #define GEM_PBUF_CUTTHRU_SIZE 1 #define GEM_DAW64_OFFSET 23 #define GEM_DAW64_SIZE 1 +#define GEM_SEGMENTS_BIT_SIZE_OFFSET 16 +#define GEM_SEGMENTS_BIT_SIZE_SIZE 3 /* Bitfields in DCFG8. */ #define GEM_T1SCR_OFFSET 24 @@ -1494,4 +1499,7 @@ struct macb_queue_enst_config { u8 queue_id; }; +u64 gem_sram_distribute_segments(unsigned int num_queues, + unsigned int num_segments); + #endif /* _MACB_H */ diff --git a/drivers/net/ethernet/cadence/macb_kunit.c b/drivers/net/ethernet/cadence/macb_kunit.c new file mode 100644 index 000000000000..b91a10bda623 --- /dev/null +++ b/drivers/net/ethernet/cadence/macb_kunit.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include "macb.h" + +struct macb_sram_segments_case { + unsigned int num_queues, num_segments; +}; + +static void macb_sram_segments_test(struct kunit *test) +{ + const struct macb_sram_segments_case *p = test->param_value; + u64 val = gem_sram_distribute_segments(p->num_queues, p->num_segments); + unsigned int i, sum_segments = 0, max_assigned_segments; + unsigned int num_queues = min(p->num_queues, p->num_segments); + + for (i = 0; i < num_queues; i++) { + unsigned int q_segments = (val >> (i * 4)) & 0b11; + + q_segments = 1U << q_segments; + sum_segments += q_segments; + KUNIT_ASSERT_GT_MSG(test, q_segments, 0, "queue %d, val %#llx", + i, val); + } + + for (i = num_queues; i < 16; i++) { + unsigned int pow = (val >> (i * 4)) & 0b11; + + KUNIT_ASSERT_EQ_MSG(test, pow, 0, "queue %d, val %#llx", + i, val); + } + + max_assigned_segments = min(p->num_segments, 8 * p->num_queues); + KUNIT_ASSERT_EQ_MSG(test, sum_segments, max_assigned_segments, + "val %#llx", val); +} + +struct macb_sram_segments_case macb_sram_segments_cases[] = { + /* num_segments can only be powers of two. */ + { .num_queues = 4, .num_segments = 2 }, + { .num_queues = 1, .num_segments = 16 }, + { .num_queues = 4, .num_segments = 16 }, + { .num_queues = 5, .num_segments = 16 }, + { .num_queues = 15, .num_segments = 16 }, + { .num_queues = 16, .num_segments = 16 }, +}; + +static void macb_sram_segments_case_desc(struct macb_sram_segments_case *t, + char *desc) +{ + u64 val = gem_sram_distribute_segments(t->num_queues, t->num_segments); + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, + "num_queues=%d num_segments=%d TXQSEGALLOC=%#llx", + t->num_queues, t->num_segments, val); +} + +KUNIT_ARRAY_PARAM(macb_sram_segments, + macb_sram_segments_cases, + macb_sram_segments_case_desc); + +static struct kunit_case macb_test_cases[] = { + KUNIT_CASE_PARAM(macb_sram_segments_test, + macb_sram_segments_gen_params), + {} +}; + +static struct kunit_suite macb_test_suite = { + .name = "macb", + .test_cases = macb_test_cases, +}; + +kunit_test_suite(macb_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Cadence MACB/GEM Ethernet driver kunit tests"); +MODULE_AUTHOR("Théo Lebrun"); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dd066f481200..f6c4b31ef75a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3053,6 +3053,21 @@ static void macb_init_hw(struct macb *bp) if (bp->caps & MACB_CAPS_JUMBO) bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK; + /* + * Distribute Tx SRAM segments evenly based on active number of queues. + */ + if (macb_is_gem(bp)) { + unsigned int num_segments; + u64 val; + + num_segments = 1U << GEM_BFEXT(SEGMENTS_BIT_SIZE, + gem_readl(bp, DCFG6)); + val = gem_sram_distribute_segments(bp->num_queues, + num_segments); + gem_writel(bp, TXQSEGALLOC_LOWER, val); + gem_writel(bp, TXQSEGALLOC_UPPER, val >> 32); + } + macb_configure_dma(bp); /* Enable RX partial store and forward and set watermark */ diff --git a/drivers/net/ethernet/cadence/macb_utils.c b/drivers/net/ethernet/cadence/macb_utils.c new file mode 100644 index 000000000000..77e0b5c1df86 --- /dev/null +++ b/drivers/net/ethernet/cadence/macb_utils.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include "macb.h" + +/* + * Distribute evenly available segments across queues. The computation is + * complex because (1) segments are counted in powers of two and (2) a queue + * can only use up to 8 segments. There are four types of cases: + * - Sharing all segments equally is doable. Take num_queues=4 and + * num_segments=16. Each queue will get 2^2=4 segments. + * - Sharing all segments is doable. Take num_queues=5 and num_segments=16. + * Three queues will get 2^2=4 segments and two will get 2^1=2 segments. + * - Sharing all segments is not doable because not enough queues are + * available. Take num_queues=1 and num_segments=16; queue 0 can only have 8 + * segments. + * - Sharing all segments is not doable because not enough segments are + * available. Take num_queues=4 and num_segments=2. + * + * We start by computing the power each queue will have. For num_queues=5 and + * num_segments=16, each queue will have at least 2^1 segments. That leaves us + * with remaining_segments=6. If we increase the power for a queue, we get a + * delta of 2 (2^2-2^1). The first three queues will therefore be advantaged + * and each have 2^2 segments. The remaining 2 queues will only have 2^1 + * segments. + */ +u64 gem_sram_distribute_segments(unsigned int num_queues, + unsigned int num_segments) +{ + unsigned int pow, remaining_segments, i; + unsigned int num_advantaged_queues = 0; + u64 val = 0; + + /* pow=0 for all queues. ilog2(0) is dangerous. */ + if (num_queues >= num_segments) + return 0; + + pow = min(ilog2(num_segments / num_queues), 3); + remaining_segments = num_segments - num_queues * (1U << pow); + + /* + * We can only distribute remaining segments if (1) there are remaining + * segments and (2) we did not reach the max segments per queue (2^3). + */ + if (remaining_segments != 0 && pow != 3) { + unsigned int delta = (1U << (pow + 1)) - (1U << pow); + + num_advantaged_queues = remaining_segments / delta; + } + + for (i = 0; i < num_advantaged_queues; i++) + val |= ((pow + 1) & 0b11) << (i * 4); + for (i = num_advantaged_queues; i < num_queues; i++) + val |= (pow & 0b11) << (i * 4); + + return val; +} -- 2.53.0