diff options
author | jc_gargma <jc_gargma@iserlohn-fortress.net> | 2020-04-02 15:08:12 -0700 |
---|---|---|
committer | jc_gargma <jc_gargma@iserlohn-fortress.net> | 2020-04-02 15:08:12 -0700 |
commit | a2e16ccece383efc399d34717c865d034a863b89 (patch) | |
tree | 5fdf5c79e39a96fbe1821ba7d1fb6d446fdebd29 /0003-bpf-Undo-incorrect-__reg_bound_offset32-handling.patch | |
parent | Rebuild with bpf hotfix (diff) | |
download | linux-libre-a2e16ccece383efc399d34717c865d034a863b89.tar.xz |
Updated to 5.6.0
Diffstat (limited to '0003-bpf-Undo-incorrect-__reg_bound_offset32-handling.patch')
-rw-r--r-- | 0003-bpf-Undo-incorrect-__reg_bound_offset32-handling.patch | 277 |
1 files changed, 277 insertions, 0 deletions
diff --git a/0003-bpf-Undo-incorrect-__reg_bound_offset32-handling.patch b/0003-bpf-Undo-incorrect-__reg_bound_offset32-handling.patch new file mode 100644 index 0000000..590e256 --- /dev/null +++ b/0003-bpf-Undo-incorrect-__reg_bound_offset32-handling.patch @@ -0,0 +1,277 @@ +From e04420186bb7d71b7a6237f36de4d9a75e83f56d Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann <daniel@iogearbox.net> +Date: Mon, 30 Mar 2020 18:03:22 +0200 +Subject: bpf: Undo incorrect __reg_bound_offset32 handling + +commit f2d67fec0b43edce8c416101cdc52e71145b5fef upstream. + +Anatoly has been fuzzing with kBdysch harness and reported a hang in +one of the outcomes: + + 0: (b7) r0 = 808464432 + 1: (7f) r0 >>= r0 + 2: (14) w0 -= 808464432 + 3: (07) r0 += 808464432 + 4: (b7) r1 = 808464432 + 5: (de) if w1 s<= w0 goto pc+0 + R0_w=invP(id=0,umin_value=808464432,umax_value=5103431727,var_off=(0x30303020;0x10000001f)) R1_w=invP808464432 R10=fp0 + 6: (07) r0 += -2144337872 + 7: (14) w0 -= -1607454672 + 8: (25) if r0 > 0x30303030 goto pc+0 + R0_w=invP(id=0,umin_value=271581184,umax_value=271581311,var_off=(0x10300000;0x7f)) R1_w=invP808464432 R10=fp0 + 9: (76) if w0 s>= 0x303030 goto pc+2 + 12: (95) exit + + from 8 to 9: safe + + from 5 to 6: R0_w=invP(id=0,umin_value=808464432,umax_value=5103431727,var_off=(0x30303020;0x10000001f)) R1_w=invP808464432 R10=fp0 + 6: (07) r0 += -2144337872 + 7: (14) w0 -= -1607454672 + 8: (25) if r0 > 0x30303030 goto pc+0 + R0_w=invP(id=0,umin_value=271581184,umax_value=271581311,var_off=(0x10300000;0x7f)) R1_w=invP808464432 R10=fp0 + 9: safe + + from 8 to 9: safe + verification time 589 usec + stack depth 0 + processed 17 insns (limit 1000000) [...] + +The underlying program was xlated as follows: + + # bpftool p d x i 9 + 0: (b7) r0 = 808464432 + 1: (7f) r0 >>= r0 + 2: (14) w0 -= 808464432 + 3: (07) r0 += 808464432 + 4: (b7) r1 = 808464432 + 5: (de) if w1 s<= w0 goto pc+0 + 6: (07) r0 += -2144337872 + 7: (14) w0 -= -1607454672 + 8: (25) if r0 > 0x30303030 goto pc+0 + 9: (76) if w0 s>= 0x303030 goto pc+2 + 10: (05) goto pc-1 + 11: (05) goto pc-1 + 12: (95) exit + +The verifier rewrote original instructions it recognized as dead code with +'goto pc-1', but reality differs from verifier simulation in that we're +actually able to trigger a hang due to hitting the 'goto pc-1' instructions. + +Taking different examples to make the issue more obvious: in this example +we're probing bounds on a completely unknown scalar variable in r1: + + [...] + 5: R0_w=inv1 R1_w=inv(id=0) R10=fp0 + 5: (18) r2 = 0x4000000000 + 7: R0_w=inv1 R1_w=inv(id=0) R2_w=inv274877906944 R10=fp0 + 7: (18) r3 = 0x2000000000 + 9: R0_w=inv1 R1_w=inv(id=0) R2_w=inv274877906944 R3_w=inv137438953472 R10=fp0 + 9: (18) r4 = 0x400 + 11: R0_w=inv1 R1_w=inv(id=0) R2_w=inv274877906944 R3_w=inv137438953472 R4_w=inv1024 R10=fp0 + 11: (18) r5 = 0x200 + 13: R0_w=inv1 R1_w=inv(id=0) R2_w=inv274877906944 R3_w=inv137438953472 R4_w=inv1024 R5_w=inv512 R10=fp0 + 13: (2d) if r1 > r2 goto pc+4 + R0_w=inv1 R1_w=inv(id=0,umax_value=274877906944,var_off=(0x0; 0x7fffffffff)) R2_w=inv274877906944 R3_w=inv137438953472 R4_w=inv1024 R5_w=inv512 R10=fp0 + 14: R0_w=inv1 R1_w=inv(id=0,umax_value=274877906944,var_off=(0x0; 0x7fffffffff)) R2_w=inv274877906944 R3_w=inv137438953472 R4_w=inv1024 R5_w=inv512 R10=fp0 + 14: (ad) if r1 < r3 goto pc+3 + R0_w=inv1 R1_w=inv(id=0,umin_value=137438953472,umax_value=274877906944,var_off=(0x0; 0x7fffffffff)) R2_w=inv274877906944 R3_w=inv137438953472 R4_w=inv1024 R5_w=inv512 R10=fp0 + 15: R0=inv1 R1=inv(id=0,umin_value=137438953472,umax_value=274877906944,var_off=(0x0; 0x7fffffffff)) R2=inv274877906944 R3=inv137438953472 R4=inv1024 R5=inv512 R10=fp0 + 15: (2e) if w1 > w4 goto pc+2 + R0=inv1 R1=inv(id=0,umin_value=137438953472,umax_value=274877906944,var_off=(0x0; 0x7f00000000)) R2=inv274877906944 R3=inv137438953472 R4=inv1024 R5=inv512 R10=fp0 + 16: R0=inv1 R1=inv(id=0,umin_value=137438953472,umax_value=274877906944,var_off=(0x0; 0x7f00000000)) R2=inv274877906944 R3=inv137438953472 R4=inv1024 R5=inv512 R10=fp0 + 16: (ae) if w1 < w5 goto pc+1 + R0=inv1 R1=inv(id=0,umin_value=137438953472,umax_value=274877906944,var_off=(0x0; 0x7f00000000)) R2=inv274877906944 R3=inv137438953472 R4=inv1024 R5=inv512 R10=fp0 + [...] + +We're first probing lower/upper bounds via jmp64, later we do a similar +check via jmp32 and examine the resulting var_off there. After fall-through +in insn 14, we get the following bounded r1 with 0x7fffffffff unknown marked +bits in the variable section. + +Thus, after knowing r1 <= 0x4000000000 and r1 >= 0x2000000000: + + max: 0b100000000000000000000000000000000000000 / 0x4000000000 + var: 0b111111111111111111111111111111111111111 / 0x7fffffffff + min: 0b010000000000000000000000000000000000000 / 0x2000000000 + +Now, in insn 15 and 16, we perform a similar probe with lower/upper bounds +in jmp32. + +Thus, after knowing r1 <= 0x4000000000 and r1 >= 0x2000000000 and + w1 <= 0x400 and w1 >= 0x200: + + max: 0b100000000000000000000000000000000000000 / 0x4000000000 + var: 0b111111100000000000000000000000000000000 / 0x7f00000000 + min: 0b010000000000000000000000000000000000000 / 0x2000000000 + +The lower/upper bounds haven't changed since they have high bits set in +u64 space and the jmp32 tests can only refine bounds in the low bits. + +However, for the var part the expectation would have been 0x7f000007ff +or something less precise up to 0x7fffffffff. A outcome of 0x7f00000000 +is not correct since it would contradict the earlier probed bounds +where we know that the result should have been in [0x200,0x400] in u32 +space. Therefore, tests with such info will lead to wrong verifier +assumptions later on like falsely predicting conditional jumps to be +always taken, etc. + +The issue here is that __reg_bound_offset32()'s implementation from +commit 581738a681b6 ("bpf: Provide better register bounds after jmp32 +instructions") makes an incorrect range assumption: + + static void __reg_bound_offset32(struct bpf_reg_state *reg) + { + u64 mask = 0xffffFFFF; + struct tnum range = tnum_range(reg->umin_value & mask, + reg->umax_value & mask); + struct tnum lo32 = tnum_cast(reg->var_off, 4); + struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32); + + reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range)); + } + +In the above walk-through example, __reg_bound_offset32() as-is chose +a range after masking with 0xffffffff of [0x0,0x0] since umin:0x2000000000 +and umax:0x4000000000 and therefore the lo32 part was clamped to 0x0 as +well. However, in the umin:0x2000000000 and umax:0x4000000000 range above +we'd end up with an actual possible interval of [0x0,0xffffffff] for u32 +space instead. + +In case of the original reproducer, the situation looked as follows at +insn 5 for r0: + + [...] + 5: R0_w=invP(id=0,umin_value=808464432,umax_value=5103431727,var_off=(0x0; 0x1ffffffff)) R1_w=invP808464432 R10=fp0 + 0x30303030 0x13030302f + 5: (de) if w1 s<= w0 goto pc+0 + R0_w=invP(id=0,umin_value=808464432,umax_value=5103431727,var_off=(0x30303020; 0x10000001f)) R1_w=invP808464432 R10=fp0 + 0x30303030 0x13030302f + [...] + +After the fall-through, we similarly forced the var_off result into +the wrong range [0x30303030,0x3030302f] suggesting later on that fixed +bits must only be of 0x30303020 with 0x10000001f unknowns whereas such +assumption can only be made when both bounds in hi32 range match. + +Originally, I was thinking to fix this by moving reg into a temp reg and +use proper coerce_reg_to_size() helper on the temp reg where we can then +based on that define the range tnum for later intersection: + + static void __reg_bound_offset32(struct bpf_reg_state *reg) + { + struct bpf_reg_state tmp = *reg; + struct tnum lo32, hi32, range; + + coerce_reg_to_size(&tmp, 4); + range = tnum_range(tmp.umin_value, tmp.umax_value); + lo32 = tnum_cast(reg->var_off, 4); + hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32); + reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range)); + } + +In the case of the concrete example, this gives us a more conservative unknown +section. Thus, after knowing r1 <= 0x4000000000 and r1 >= 0x2000000000 and + w1 <= 0x400 and w1 >= 0x200: + + max: 0b100000000000000000000000000000000000000 / 0x4000000000 + var: 0b111111111111111111111111111111111111111 / 0x7fffffffff + min: 0b010000000000000000000000000000000000000 / 0x2000000000 + +However, above new __reg_bound_offset32() has no effect on refining the +knowledge of the register contents. Meaning, if the bounds in hi32 range +mismatch we'll get the identity function given the range reg spans +[0x0,0xffffffff] and we cast var_off into lo32 only to later on binary +or it again with the hi32. + +Likewise, if the bounds in hi32 range match, then we mask both bounds +with 0xffffffff, use the resulting umin/umax for the range to later +intersect the lo32 with it. However, _prior_ called __reg_bound_offset() +did already such intersection on the full reg and we therefore would only +repeat the same operation on the lo32 part twice. + +Given this has no effect and the original commit had false assumptions, +this patch reverts the code entirely which is also more straight forward +for stable trees: apparently 581738a681b6 got auto-selected by Sasha's +ML system and misclassified as a fix, so it got sucked into v5.4 where +it should never have landed. A revert is low-risk also from a user PoV +since it requires a recent kernel and llc to opt-into -mcpu=v3 BPF CPU +to generate jmp32 instructions. A proper bounds refinement would need a +significantly more complex approach which is currently being worked, but +no stable material [0]. Hence revert is best option for stable. After the +revert, the original reported program gets rejected as follows: + + 1: (7f) r0 >>= r0 + 2: (14) w0 -= 808464432 + 3: (07) r0 += 808464432 + 4: (b7) r1 = 808464432 + 5: (de) if w1 s<= w0 goto pc+0 + R0_w=invP(id=0,umin_value=808464432,umax_value=5103431727,var_off=(0x0; 0x1ffffffff)) R1_w=invP808464432 R10=fp0 + 6: (07) r0 += -2144337872 + 7: (14) w0 -= -1607454672 + 8: (25) if r0 > 0x30303030 goto pc+0 + R0_w=invP(id=0,umax_value=808464432,var_off=(0x0; 0x3fffffff)) R1_w=invP808464432 R10=fp0 + 9: (76) if w0 s>= 0x303030 goto pc+2 + R0=invP(id=0,umax_value=3158063,var_off=(0x0; 0x3fffff)) R1=invP808464432 R10=fp0 + 10: (30) r0 = *(u8 *)skb[808464432] + BPF_LD_[ABS|IND] uses reserved fields + processed 11 insns (limit 1000000) [...] + + [0] https://lore.kernel.org/bpf/158507130343.15666.8018068546764556975.stgit@john-Precision-5820-Tower/T/ + +Fixes: 581738a681b6 ("bpf: Provide better register bounds after jmp32 instructions") +Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com> +Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Link: https://lore.kernel.org/bpf/20200330160324.15259-2-daniel@iogearbox.net +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/verifier.c | 19 ------------------- + 1 file changed, 19 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 1cc945daa9c8..5080469094af 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1034,17 +1034,6 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) + reg->umax_value)); + } + +-static void __reg_bound_offset32(struct bpf_reg_state *reg) +-{ +- u64 mask = 0xffffFFFF; +- struct tnum range = tnum_range(reg->umin_value & mask, +- reg->umax_value & mask); +- struct tnum lo32 = tnum_cast(reg->var_off, 4); +- struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32); +- +- reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range)); +-} +- + /* Reset the min/max bounds of a register */ + static void __mark_reg_unbounded(struct bpf_reg_state *reg) + { +@@ -5717,10 +5706,6 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, + /* We might have learned some bits from the bounds. */ + __reg_bound_offset(false_reg); + __reg_bound_offset(true_reg); +- if (is_jmp32) { +- __reg_bound_offset32(false_reg); +- __reg_bound_offset32(true_reg); +- } + /* Intersecting with the old var_off might have improved our bounds + * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. +@@ -5830,10 +5815,6 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, + /* We might have learned some bits from the bounds. */ + __reg_bound_offset(false_reg); + __reg_bound_offset(true_reg); +- if (is_jmp32) { +- __reg_bound_offset32(false_reg); +- __reg_bound_offset32(true_reg); +- } + /* Intersecting with the old var_off might have improved our bounds + * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. +-- +cgit v1.2.3-1-gf6bb5 + |