Commit af487c57 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-optimize-neg-sums'

Jakub Kicinski says:

====================
This set adds an optimization run to the NFP jit to turn ADD and SUB
instructions with negative immediate into the opposite operation with
a positive immediate. NFP can fit small immediates into the instructions
but it can't ever fit negative immediates. Addition of small negative
immediates is quite common in BPF programs for stack address calculations,
therefore this optimization gives us non-negligible savings in instruction
count (up to 4%).
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents a18fda1a 7bdc97be
......@@ -1214,45 +1214,83 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return 0;
}
static int
wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
enum br_mask br_mask, bool swap)
static const struct jmp_code_map {
enum br_mask br_mask;
bool swap;
} jmp_code_map[] = {
[BPF_JGT >> 4] = { BR_BLO, true },
[BPF_JGE >> 4] = { BR_BHS, false },
[BPF_JLT >> 4] = { BR_BLO, false },
[BPF_JLE >> 4] = { BR_BHS, true },
[BPF_JSGT >> 4] = { BR_BLT, true },
[BPF_JSGE >> 4] = { BR_BGE, false },
[BPF_JSLT >> 4] = { BR_BLT, false },
[BPF_JSLE >> 4] = { BR_BGE, true },
};
static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
{
unsigned int op;
op = BPF_OP(meta->insn.code) >> 4;
/* br_mask of 0 is BR_BEQ which we don't use in jump code table */
if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
!jmp_code_map[op].br_mask,
"no code found for jump instruction"))
return NULL;
return &jmp_code_map[op];
}
static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 imm = insn->imm; /* sign extend */
const struct jmp_code_map *code;
enum alu_op alu_op, carry_op;
u8 reg = insn->dst_reg * 2;
swreg tmp_reg;
code = nfp_jmp_code_get(meta);
if (!code)
return -EINVAL;
alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
if (!swap)
emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
if (!code->swap)
emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
else
emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
if (!swap)
if (!code->swap)
emit_alu(nfp_prog, reg_none(),
reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
reg_a(reg + 1), carry_op, tmp_reg);
else
emit_alu(nfp_prog, reg_none(),
tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
tmp_reg, carry_op, reg_a(reg + 1));
emit_br(nfp_prog, br_mask, insn->off, 0);
emit_br(nfp_prog, code->br_mask, insn->off, 0);
return 0;
}
static int
wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
enum br_mask br_mask, bool swap)
static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
const struct jmp_code_map *code;
u8 areg, breg;
code = nfp_jmp_code_get(meta);
if (!code)
return -EINVAL;
areg = insn->dst_reg * 2;
breg = insn->src_reg * 2;
if (swap) {
if (code->swap) {
areg ^= breg;
breg ^= areg;
areg ^= breg;
......@@ -1261,7 +1299,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
emit_alu(nfp_prog, reg_none(),
reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
emit_br(nfp_prog, br_mask, insn->off, 0);
emit_br(nfp_prog, code->br_mask, insn->off, 0);
return 0;
}
......@@ -2283,46 +2321,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
}
static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
}
static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
}
static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
}
static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
}
static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
}
static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
}
static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
}
static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
......@@ -2392,46 +2390,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
}
static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
}
static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
}
static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
}
static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
}
static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
}
static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
}
static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
}
static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
......@@ -2520,25 +2478,25 @@ static const instr_cb_t instr_cb[256] = {
[BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
[BPF_JMP | BPF_JA | BPF_K] = jump,
[BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
[BPF_JMP | BPF_JGT | BPF_K] = jgt_imm,
[BPF_JMP | BPF_JGE | BPF_K] = jge_imm,
[BPF_JMP | BPF_JLT | BPF_K] = jlt_imm,
[BPF_JMP | BPF_JLE | BPF_K] = jle_imm,
[BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm,
[BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm,
[BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm,
[BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm,
[BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
[BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
[BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
[BPF_JMP | BPF_JGT | BPF_X] = jgt_reg,
[BPF_JMP | BPF_JGE | BPF_X] = jge_reg,
[BPF_JMP | BPF_JLT | BPF_X] = jlt_reg,
[BPF_JMP | BPF_JLE | BPF_X] = jle_reg,
[BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg,
[BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg,
[BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg,
[BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg,
[BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
[BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
[BPF_JMP | BPF_CALL] = call,
......@@ -2777,6 +2735,54 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
}
}
/* abs(insn.imm) will fit better into unrestricted reg immediate -
* convert add/sub of a negative number into a sub/add of a positive one.
*/
static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
{
struct nfp_insn_meta *meta;
list_for_each_entry(meta, &nfp_prog->insns, l) {
struct bpf_insn insn = meta->insn;
if (meta->skip)
continue;
if (BPF_CLASS(insn.code) != BPF_ALU &&
BPF_CLASS(insn.code) != BPF_ALU64 &&
BPF_CLASS(insn.code) != BPF_JMP)
continue;
if (BPF_SRC(insn.code) != BPF_K)
continue;
if (insn.imm >= 0)
continue;
if (BPF_CLASS(insn.code) == BPF_JMP) {
switch (BPF_OP(insn.code)) {
case BPF_JGE:
case BPF_JSGE:
case BPF_JLT:
case BPF_JSLT:
meta->jump_neg_op = true;
break;
default:
continue;
}
} else {
if (BPF_OP(insn.code) == BPF_ADD)
insn.code = BPF_CLASS(insn.code) | BPF_SUB;
else if (BPF_OP(insn.code) == BPF_SUB)
insn.code = BPF_CLASS(insn.code) | BPF_ADD;
else
continue;
meta->insn.code = insn.code | BPF_K;
}
meta->insn.imm = -insn.imm;
}
}
/* Remove masking after load since our load guarantees this is not needed */
static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
{
......@@ -3212,6 +3218,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
{
nfp_bpf_opt_reg_init(nfp_prog);
nfp_bpf_opt_neg_add_sub(nfp_prog);
nfp_bpf_opt_ld_mask(nfp_prog);
nfp_bpf_opt_ld_shift(nfp_prog);
nfp_bpf_opt_ldst_gather(nfp_prog);
......
......@@ -236,6 +236,7 @@ struct nfp_bpf_reg_state {
* @xadd_over_16bit: 16bit immediate is not guaranteed
* @xadd_maybe_16bit: 16bit immediate is possible
* @jmp_dst: destination info for jump instructions
* @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB
* @func_id: function id for call instructions
* @arg1: arg1 for call instructions
* @arg2: arg2 for call instructions
......@@ -264,7 +265,10 @@ struct nfp_insn_meta {
bool xadd_maybe_16bit;
};
/* jump */
struct {
struct nfp_insn_meta *jmp_dst;
bool jump_neg_op;
};
/* function calls */
struct {
u32 func_id;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment