Per the Linux Kernel Memory Model, value-returning atomic RMW operations must provide sequentially consistent ordering (a full memory barrier). On LoongArch, plain AMO instructions and bare ll/sc loops do not satisfy this requirement by themselves. Update emit_atomic_rmw() to emit barrier-carrying instructions for all value-returning BPF atomics: - BPF_FETCH (ADD/AND/OR/XOR): use am*_db.{b,h,w,d} - BPF_XCHG: use amswap_db.{b,h,w,d} - BPF_CMPXCHG: emit dbar 0x700 after the ll/sc loop, matching __WEAK_LLSC_MB in cmpxchg.h Add the corresponding instruction encodings and emit helpers to inst.h. Non-value-returning RMW ops (plain BPF_ADD, BPF_AND, etc.) are left as weakly ordered, consistent with LKMM. Signed-off-by: Chenguang Zhao --- arch/loongarch/include/asm/inst.h | 18 +++++++++++++++++ arch/loongarch/net/bpf_jit.c | 32 +++++++++++++++++-------------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 76b723590023..bdbc17d07110 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -199,6 +199,10 @@ enum reg3_op { amswaph_op = 0x70b9, amaddb_op = 0x70ba, amaddh_op = 0x70bb, + amswapdbb_op = 0x70bc, + amswapdbh_op = 0x70bd, + amadddbb_op = 0x70be, + amadddbh_op = 0x70bf, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2, @@ -783,6 +787,20 @@ DEF_EMIT_REG3_FORMAT(amswapb, amswapb_op) DEF_EMIT_REG3_FORMAT(amswaph, amswaph_op) DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) +DEF_EMIT_REG3_FORMAT(amswapdbb, amswapdbb_op) +DEF_EMIT_REG3_FORMAT(amswapdbh, amswapdbh_op) +DEF_EMIT_REG3_FORMAT(amadddbb, amadddbb_op) +DEF_EMIT_REG3_FORMAT(amadddbh, amadddbh_op) +DEF_EMIT_REG3_FORMAT(amadddbw, amadddbw_op) +DEF_EMIT_REG3_FORMAT(amadddbd, amadddbd_op) +DEF_EMIT_REG3_FORMAT(amanddbw, amanddbw_op) +DEF_EMIT_REG3_FORMAT(amanddbd, amanddbd_op) +DEF_EMIT_REG3_FORMAT(amordbw, amordbw_op) +DEF_EMIT_REG3_FORMAT(amordbd, amordbd_op) +DEF_EMIT_REG3_FORMAT(amxordbw, amxordbw_op) +DEF_EMIT_REG3_FORMAT(amxordbd, amxordbd_op) +DEF_EMIT_REG3_FORMAT(amswapdbw, amswapdbw_op) +DEF_EMIT_REG3_FORMAT(amswapdbd, amswapdbd_op) #define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 24913dc7f4e8..47707579e61c 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -7,6 +7,9 @@ #include #include "bpf_jit.h" +/* dbar hint for ll/sc completion ordering, see __WEAK_LLSC_MB */ +#define DBAR_LLSC_MB 0x700 + #define LOONGARCH_MAX_REG_ARGS 8 #define LOONGARCH_LONG_JUMP_NINSNS 5 @@ -418,7 +421,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amadd.b instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amaddb, src, t1, t3); + emit_insn(ctx, amadddbb, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_H: @@ -426,39 +429,39 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amadd.h instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amaddh, src, t1, t3); + emit_insn(ctx, amadddbh, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_W: - emit_insn(ctx, amaddw, src, t1, t3); + emit_insn(ctx, amadddbw, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_DW: - emit_insn(ctx, amaddd, src, t1, t3); + emit_insn(ctx, amadddbd, src, t1, t3); break; } break; case BPF_AND | BPF_FETCH: if (isdw) { - emit_insn(ctx, amandd, src, t1, t3); + emit_insn(ctx, amanddbd, src, t1, t3); } else { - emit_insn(ctx, amandw, src, t1, t3); + emit_insn(ctx, amanddbw, src, t1, t3); emit_zext_32(ctx, src, true); } break; case BPF_OR | BPF_FETCH: if (isdw) { - emit_insn(ctx, amord, src, t1, t3); + emit_insn(ctx, amordbd, src, t1, t3); } else { - emit_insn(ctx, amorw, src, t1, t3); + emit_insn(ctx, amordbw, src, t1, t3); emit_zext_32(ctx, src, true); } break; case BPF_XOR | BPF_FETCH: if (isdw) { - emit_insn(ctx, amxord, src, t1, t3); + emit_insn(ctx, amxordbd, src, t1, t3); } else { - emit_insn(ctx, amxorw, src, t1, t3); + emit_insn(ctx, amxordbw, src, t1, t3); emit_zext_32(ctx, src, true); } break; @@ -470,7 +473,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amswap.b instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amswapb, src, t1, t3); + emit_insn(ctx, amswapdbb, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_H: @@ -478,15 +481,15 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amswap.h instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amswaph, src, t1, t3); + emit_insn(ctx, amswapdbh, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_W: - emit_insn(ctx, amswapw, src, t1, t3); + emit_insn(ctx, amswapdbw, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_DW: - emit_insn(ctx, amswapd, src, t1, t3); + emit_insn(ctx, amswapdbd, src, t1, t3); break; } break; @@ -509,6 +512,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); emit_zext_32(ctx, r0, true); } + emit_insn(ctx, dbar, DBAR_LLSC_MB); break; default: pr_err_once("bpf-jit: invalid atomic read-modify-write opcode %02x\n", imm); -- 2.25.1