Commit 92b31a9a authored by Daniel Borkmann's avatar Daniel Borkmann Committed by David S. Miller

bpf: add BPF_J{LT,LE,SLT,SLE} instructions

Currently, eBPF only understands BPF_JGT (>), BPF_JGE (>=),
BPF_JSGT (s>), BPF_JSGE (s>=) instructions, this means that
particularly *JLT/*JLE counterparts involving immediates need
to be rewritten from e.g. X < [IMM] by swapping arguments into
[IMM] > X, meaning the immediate first is required to be loaded
into a register Y := [IMM], such that then we can compare with
Y > X. Note that the destination operand is always required to
be a register.

This has the downside of having unnecessarily increased register
pressure, meaning complex program would need to spill other
registers temporarily to stack in order to obtain an unused
register for the [IMM]. Loading to registers will thus also
affect state pruning since we need to account for that register
use and potentially those registers that had to be spilled/filled
again. As a consequence slightly more stack space might have
been used due to spilling, and BPF programs are a bit longer
due to extra code involving the register load and potentially
required spill/fills.

Thus, add BPF_JLT (<), BPF_JLE (<=), BPF_JSLT (s<), BPF_JSLE (s<=)
counterparts to the eBPF instruction set. Modifying LLVM to
remove the NegateCC() workaround in a PoC patch at [1] and
allowing it to also emit the new instructions resulted in
cilium's BPF programs that are injected into the fast-path to
have a reduced program length in the range of 2-3% (e.g.
accumulated main and tail call sections from one of the object
file reduced from 4864 to 4729 insns), reduced complexity in
the range of 10-30% (e.g. accumulated sections reduced in one
of the cases from 116432 to 88428 insns), and reduced stack
usage in the range of 1-5% (e.g. accumulated sections from one
of the object files reduced from 824 to 784b).

The modification for LLVM will be incorporated in a backwards
compatible way. Plan is for LLVM to have i) a target specific
option to offer a possibility to explicitly enable the extension
by the user (as we have with -m target specific extensions today
for various CPU insns), and ii) have the kernel checked for
presence of the extensions and enable them transparently when
the user is selecting more aggressive options such as -march=native
in a bpf target context. (Other frontends generating BPF byte
code, e.g. ply can probe the kernel directly for its code
generation.)

  [1] https://github.com/borkmann/llvm/tree/bpf-insnsSigned-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0bdf7101
...@@ -906,6 +906,10 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: ...@@ -906,6 +906,10 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
BPF_JSGE 0x70 /* eBPF only: signed '>=' */ BPF_JSGE 0x70 /* eBPF only: signed '>=' */
BPF_CALL 0x80 /* eBPF only: function call */ BPF_CALL 0x80 /* eBPF only: function call */
BPF_EXIT 0x90 /* eBPF only: function return */ BPF_EXIT 0x90 /* eBPF only: function return */
BPF_JLT 0xa0 /* eBPF only: unsigned '<' */
BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */
BPF_JSLT 0xc0 /* eBPF only: signed '<' */
BPF_JSLE 0xd0 /* eBPF only: signed '<=' */
So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
and eBPF. There are only two registers in classic BPF, so it means A += X. and eBPF. There are only two registers in classic BPF, so it means A += X.
......
...@@ -30,9 +30,14 @@ ...@@ -30,9 +30,14 @@
#define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_LE BPF_TO_LE
#define BPF_FROM_BE BPF_TO_BE #define BPF_FROM_BE BPF_TO_BE
/* jmp encodings */
#define BPF_JNE 0x50 /* jump != */ #define BPF_JNE 0x50 /* jump != */
#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
#define BPF_CALL 0x80 /* function call */ #define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */ #define BPF_EXIT 0x90 /* function return */
......
...@@ -595,9 +595,13 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, ...@@ -595,9 +595,13 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGT | BPF_K:
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_K:
/* Accommodate for extra offset in case of a backjump. */ /* Accommodate for extra offset in case of a backjump. */
off = from->off; off = from->off;
...@@ -833,12 +837,20 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, ...@@ -833,12 +837,20 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
[BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
[BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
[BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
[BPF_JMP | BPF_JLT | BPF_X] = &&JMP_JLT_X,
[BPF_JMP | BPF_JLT | BPF_K] = &&JMP_JLT_K,
[BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
[BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
[BPF_JMP | BPF_JLE | BPF_X] = &&JMP_JLE_X,
[BPF_JMP | BPF_JLE | BPF_K] = &&JMP_JLE_K,
[BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
[BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
[BPF_JMP | BPF_JSLT | BPF_X] = &&JMP_JSLT_X,
[BPF_JMP | BPF_JSLT | BPF_K] = &&JMP_JSLT_K,
[BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
[BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
[BPF_JMP | BPF_JSLE | BPF_X] = &&JMP_JSLE_X,
[BPF_JMP | BPF_JSLE | BPF_K] = &&JMP_JSLE_K,
[BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
[BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
/* Program return */ /* Program return */
...@@ -1073,6 +1085,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, ...@@ -1073,6 +1085,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
CONT_JMP; CONT_JMP;
} }
CONT; CONT;
JMP_JLT_X:
if (DST < SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JLT_K:
if (DST < IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JGE_X: JMP_JGE_X:
if (DST >= SRC) { if (DST >= SRC) {
insn += insn->off; insn += insn->off;
...@@ -1085,6 +1109,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, ...@@ -1085,6 +1109,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
CONT_JMP; CONT_JMP;
} }
CONT; CONT;
JMP_JLE_X:
if (DST <= SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JLE_K:
if (DST <= IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSGT_X: JMP_JSGT_X:
if (((s64) DST) > ((s64) SRC)) { if (((s64) DST) > ((s64) SRC)) {
insn += insn->off; insn += insn->off;
...@@ -1097,6 +1133,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, ...@@ -1097,6 +1133,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
CONT_JMP; CONT_JMP;
} }
CONT; CONT;
JMP_JSLT_X:
if (((s64) DST) < ((s64) SRC)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSLT_K:
if (((s64) DST) < ((s64) IMM)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSGE_X: JMP_JSGE_X:
if (((s64) DST) >= ((s64) SRC)) { if (((s64) DST) >= ((s64) SRC)) {
insn += insn->off; insn += insn->off;
...@@ -1109,6 +1157,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, ...@@ -1109,6 +1157,18 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
CONT_JMP; CONT_JMP;
} }
CONT; CONT;
JMP_JSLE_X:
if (((s64) DST) <= ((s64) SRC)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSLE_K:
if (((s64) DST) <= ((s64) IMM)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSET_X: JMP_JSET_X:
if (DST & SRC) { if (DST & SRC) {
insn += insn->off; insn += insn->off;
......
This diff is collapsed.
...@@ -514,14 +514,27 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, ...@@ -514,14 +514,27 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
break; break;
} }
/* Convert JEQ into JNE when 'jump_true' is next insn. */ /* Convert some jumps when 'jump_true' is next insn. */
if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { if (fp->jt == 0) {
switch (BPF_OP(fp->code)) {
case BPF_JEQ:
insn->code = BPF_JMP | BPF_JNE | bpf_src; insn->code = BPF_JMP | BPF_JNE | bpf_src;
break;
case BPF_JGT:
insn->code = BPF_JMP | BPF_JLE | bpf_src;
break;
case BPF_JGE:
insn->code = BPF_JMP | BPF_JLT | bpf_src;
break;
default:
goto jmp_rest;
}
target = i + fp->jf + 1; target = i + fp->jf + 1;
BPF_EMIT_JMP; BPF_EMIT_JMP;
break; break;
} }
jmp_rest:
/* Other jumps are mapped into two insns: Jxx and JA. */ /* Other jumps are mapped into two insns: Jxx and JA. */
target = i + fp->jt + 1; target = i + fp->jt + 1;
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
......
...@@ -30,9 +30,14 @@ ...@@ -30,9 +30,14 @@
#define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_LE BPF_TO_LE
#define BPF_FROM_BE BPF_TO_BE #define BPF_FROM_BE BPF_TO_BE
/* jmp encodings */
#define BPF_JNE 0x50 /* jump != */ #define BPF_JNE 0x50 /* jump != */
#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
#define BPF_CALL 0x80 /* function call */ #define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */ #define BPF_EXIT 0x90 /* function return */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment