The 8 and 16 bit read-modify-write atomic instructions amadd.{b/h} and amswap.{b/h} were newly added in the latest LoongArch Reference Manual, define the instruction format and check whether support via cpucfg. Furthermore, define the instruction format for DBAR which will be used to support BPF load-acquire and store-release instructions. This is preparation for later patch. Signed-off-by: Tiezhu Yang --- arch/loongarch/include/asm/cpu-features.h | 1 + arch/loongarch/include/asm/cpu.h | 2 ++ arch/loongarch/include/asm/inst.h | 10 ++++++++++ arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++++ arch/loongarch/kernel/proc.c | 2 ++ 6 files changed, 20 insertions(+) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 8eefe7a2098b..f9d3188accfc 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -68,5 +68,6 @@ #define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) #define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT) +#define cpu_has_lam_bh cpu_opt(LOONGARCH_CPU_LAM_BH) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 1e60ab264cd0..b423b1f41145 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -126,6 +126,7 @@ static inline char *id_to_core_name(unsigned int id) #define CPU_FEATURE_MSGINT 30 /* CPU has MSG interrupt */ #define CPU_FEATURE_AVECINT 31 /* CPU has AVEC interrupt */ #define CPU_FEATURE_REDIRECTINT 32 /* CPU has interrupt remapping */ +#define CPU_FEATURE_LAM_BH 33 /* CPU has AM{SWAP/ADD}[_DB].{B/H} instructions */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -160,5 +161,6 @@ static inline char *id_to_core_name(unsigned int id) #define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) #define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT) +#define LOONGARCH_CPU_LAM_BH BIT_ULL(CPU_FEATURE_LAM_BH) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index f9f207082d0e..76b723590023 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -36,6 +36,7 @@ enum reg0i15_op { break_op = 0x54, + dbar_op = 0x70e4, }; enum reg0i26_op { @@ -194,6 +195,10 @@ enum reg3_op { fstxs_op = 0x7070, fstxd_op = 0x7078, scq_op = 0x70ae, + amswapb_op = 0x70b8, + amswaph_op = 0x70b9, + amaddb_op = 0x70ba, + amaddh_op = 0x70bb, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2, @@ -543,6 +548,7 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ } DEF_EMIT_REG0I15_FORMAT(break, break_op) +DEF_EMIT_REG0I15_FORMAT(dbar, dbar_op) /* like emit_break(imm) but returns a constant expression */ #define __emit_break(imm) ((u32)((imm) | (break_op << 15))) @@ -763,6 +769,8 @@ DEF_EMIT_REG3_FORMAT(stxb, stxb_op) DEF_EMIT_REG3_FORMAT(stxh, stxh_op) DEF_EMIT_REG3_FORMAT(stxw, stxw_op) DEF_EMIT_REG3_FORMAT(stxd, stxd_op) +DEF_EMIT_REG3_FORMAT(amaddb, amaddb_op) +DEF_EMIT_REG3_FORMAT(amaddh, amaddh_op) DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op) DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op) DEF_EMIT_REG3_FORMAT(amandw, amandw_op) @@ -771,6 +779,8 @@ DEF_EMIT_REG3_FORMAT(amorw, amorw_op) DEF_EMIT_REG3_FORMAT(amord, amord_op) DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op) DEF_EMIT_REG3_FORMAT(amxord, amxord_op) +DEF_EMIT_REG3_FORMAT(amswapb, amswapb_op) +DEF_EMIT_REG3_FORMAT(amswaph, amswaph_op) DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 49519b4362c6..90e96113ba51 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -19,5 +19,6 @@ #define HWCAP_LOONGARCH_PTW (1 << 13) #define HWCAP_LOONGARCH_LSPW (1 << 14) #define HWCAP_LOONGARCH_SCQ (1 << 15) +#define HWCAP_LOONGARCH_LAM_BH (1 << 16) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 657bbae6c1c7..93466fc7d33d 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -177,6 +177,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_LAM; elf_hwcap |= HWCAP_LOONGARCH_LAM; } + if (config & CPUCFG2_LAM_BH) { + c->options |= LOONGARCH_CPU_LAM_BH; + elf_hwcap |= HWCAP_LOONGARCH_LAM_BH; + } if (config & CPUCFG2_SCQ) { c->options |= LOONGARCH_CPU_SCQ; elf_hwcap |= HWCAP_LOONGARCH_SCQ; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index a8127e83da65..d4ce5b585453 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -64,6 +64,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_puts(m, " cpucfg"); if (cpu_has_lam) seq_puts(m, " lam"); + if (cpu_has_lam_bh) + seq_puts(m, " lam_bh"); if (cpu_has_scq) seq_puts(m, " scq"); if (cpu_has_ual) -- 2.42.0 Like the other archs such as x86 and riscv, add the default case in emit_atomic() to print an error message for the invalid opcode and return -EINVAL , then make its return type as int. While at it, given that all of the instructions in emit_atomic() are only read-modify-write instructions, rename emit_atomic() to emit_atomic_rmw() to make it clear, because there will be a new function emit_atomic_ld_st() for load-acquire and store-release instructions in the later patch. Signed-off-by: Tiezhu Yang --- arch/loongarch/net/bpf_jit.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 9cb796e16379..fefda4050a20 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -344,7 +344,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) #undef jmp_offset } -static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 t1 = LOONGARCH_GPR_T1; const u8 t2 = LOONGARCH_GPR_T2; @@ -448,7 +448,12 @@ static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_zext_32(ctx, r0, true); } break; + default: + pr_err_once("bpf-jit: invalid atomic read-modify-write opcode %02x\n", imm); + return -EINVAL; } + + return 0; } static bool is_signed_bpf_cond(u8 cond) @@ -1256,7 +1261,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - emit_atomic(insn, ctx); + ret = emit_atomic_rmw(insn, ctx); + if (ret) + return ret; break; /* Speculation barrier */ -- 2.42.0 The 8 and 16 bit read-modify-write instructions {amadd/amswap}.{b/h} were newly added in the latest LoongArch Reference Manual, use them to avoid the error of unknown opcode if possible. Signed-off-by: Tiezhu Yang --- arch/loongarch/net/bpf_jit.c | 83 ++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index fefda4050a20..c9a32f124f5e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -363,10 +363,30 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) switch (imm) { /* lock *(size *)(dst + off) = src */ case BPF_ADD: - if (isdw) - emit_insn(ctx, amaddd, t2, t1, src); - else + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (cpu_has_lam_bh) { + emit_insn(ctx, amaddb, t2, t1, src); + } else { + pr_err_once("bpf-jit: amadd.b instruction is not supported\n"); + return -EINVAL; + } + break; + case BPF_H: + if (cpu_has_lam_bh) { + emit_insn(ctx, amaddh, t2, t1, src); + } else { + pr_err_once("bpf-jit: amadd.h instruction is not supported\n"); + return -EINVAL; + } + break; + case BPF_W: emit_insn(ctx, amaddw, t2, t1, src); + break; + case BPF_DW: + emit_insn(ctx, amaddd, t2, t1, src); + break; + } break; case BPF_AND: if (isdw) @@ -388,11 +408,32 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* src = atomic_fetch_(dst + off, src) */ case BPF_ADD | BPF_FETCH: - if (isdw) { - emit_insn(ctx, amaddd, src, t1, t3); - } else { + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (cpu_has_lam_bh) { + emit_insn(ctx, amaddb, src, t1, t3); + emit_zext_32(ctx, src, true); + } else { + pr_err_once("bpf-jit: amadd.b instruction is not supported\n"); + return -EINVAL; + } + break; + case BPF_H: + if (cpu_has_lam_bh) { + emit_insn(ctx, amaddh, src, t1, t3); + emit_zext_32(ctx, src, true); + } else { + pr_err_once("bpf-jit: amadd.h instruction is not supported\n"); + return -EINVAL; + } + break; + case BPF_W: emit_insn(ctx, amaddw, src, t1, t3); emit_zext_32(ctx, src, true); + break; + case BPF_DW: + emit_insn(ctx, amaddd, src, t1, t3); + break; } break; case BPF_AND | BPF_FETCH: @@ -421,11 +462,32 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* src = atomic_xchg(dst + off, src); */ case BPF_XCHG: - if (isdw) { - emit_insn(ctx, amswapd, src, t1, t3); - } else { + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (cpu_has_lam_bh) { + emit_insn(ctx, amswapb, src, t1, t3); + emit_zext_32(ctx, src, true); + } else { + pr_err_once("bpf-jit: amswap.b instruction is not supported\n"); + return -EINVAL; + } + break; + case BPF_H: + if (cpu_has_lam_bh) { + emit_insn(ctx, amswaph, src, t1, t3); + emit_zext_32(ctx, src, true); + } else { + pr_err_once("bpf-jit: amswap.h instruction is not supported\n"); + return -EINVAL; + } + break; + case BPF_W: emit_insn(ctx, amswapw, src, t1, t3); emit_zext_32(ctx, src, true); + break; + case BPF_DW: + emit_insn(ctx, amswapd, src, t1, t3); + break; } break; /* r0 = atomic_cmpxchg(dst + off, r0, src); */ @@ -1259,6 +1321,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext return ret; break; + /* Atomics */ + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: ret = emit_atomic_rmw(insn, ctx); -- 2.42.0 Use the LoongArch common memory access instructions with the barrier dbar to support the BPF load-acquire and store-release instructions. With this patch, the following testcases passed on LoongArch if the macro CAN_USE_LOAD_ACQ_STORE_REL is usable in bpf selftests: sudo ./test_progs -t verifier_load_acquire sudo ./test_progs -t verifier_store_release sudo ./test_progs -t verifier_precision/bpf_load_acquire sudo ./test_progs -t verifier_precision/bpf_store_release sudo ./test_progs -t compute_live_registers/atomic_load_acq_store_rel Signed-off-by: Tiezhu Yang --- arch/loongarch/net/bpf_jit.c | 98 +++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index c9a32f124f5e..805f95cbe798 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -344,6 +344,99 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) #undef jmp_offset } +static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 t1 = LOONGARCH_GPR_T1; + const u8 src = regmap[insn->src_reg]; + const u8 dst = regmap[insn->dst_reg]; + const s16 off = insn->off; + const s32 imm = insn->imm; + + switch (imm) { + /* dst_reg = load_acquire(src_reg + off16) */ + case BPF_LOAD_ACQ: + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldbu, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxbu, dst, src, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldhu, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxhu, dst, src, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldwu, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxwu, dst, src, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldd, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxd, dst, src, t1); + } + break; + } + emit_insn(ctx, dbar, 0b10100); + break; + /* store_release(dst_reg + off16, src_reg) */ + case BPF_STORE_REL: + emit_insn(ctx, dbar, 0b10010); + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, stb, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxb, src, dst, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, sth, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxh, src, dst, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, stw, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxw, src, dst, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, std, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxd, src, dst, t1); + } + break; + } + break; + default: + pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm); + return -EINVAL; + } + + return 0; +} + static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 t1 = LOONGARCH_GPR_T1; @@ -1326,7 +1419,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - ret = emit_atomic_rmw(insn, ctx); + if (bpf_atomic_is_load_store(insn)) + ret = emit_atomic_ld_st(insn, ctx); + else + ret = emit_atomic_rmw(insn, ctx); if (ret) return ret; break; -- 2.42.0 In order to do the following load-acquire and store-release tests on LoongArch: sudo ./test_progs -t verifier_load_acquire sudo ./test_progs -t verifier_store_release sudo ./test_progs -t verifier_precision/bpf_load_acquire sudo ./test_progs -t verifier_precision/bpf_store_release sudo ./test_progs -t compute_live_registers/atomic_load_acq_store_rel it needs to make CAN_USE_LOAD_ACQ_STORE_REL usable for LoongArch. Signed-off-by: Tiezhu Yang --- tools/testing/selftests/bpf/progs/bpf_misc.h | 4 ++-- tools/testing/selftests/bpf/progs/verifier_precision.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index c9bfbe1bafc1..19f0bf44a9e1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -257,8 +257,8 @@ #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \ - (defined(__TARGET_ARCH_powerpc)) + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_loongarch)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 4794903aec8e..6f325876efdd 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -75,8 +75,8 @@ __naked int bpf_end_to_be(void) #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ - __clang_major__ >= 18 + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && __clang_major__ >= 18 SEC("?raw_tp") __success __log_level(2) -- 2.42.0