From 7364eb4064c68212c90f7c98ee62d29f2abf2499 Mon Sep 17 00:00:00 2001
From: Luis Gerhorst <gerhorst@cs.fau.de>
Date: Wed, 19 Jul 2023 18:26:48 +0200
Subject: [PATCH] [DRAFT] Separate spec_v1/v4 barrier instructions

---
 arch/arm/net/bpf_jit_32.c         |  3 +-
 arch/arm64/net/bpf_jit_comp.c     |  8 ++++--
 arch/loongarch/net/bpf_jit.c      |  3 +-
 arch/mips/net/bpf_jit_comp32.c    |  3 +-
 arch/mips/net/bpf_jit_comp64.c    |  3 +-
 arch/powerpc/net/bpf_jit_comp32.c |  3 +-
 arch/powerpc/net/bpf_jit_comp64.c |  3 +-
 arch/riscv/net/bpf_jit_comp32.c   |  3 +-
 arch/riscv/net/bpf_jit_comp64.c   |  3 +-
 arch/s390/net/bpf_jit_comp.c      |  3 +-
 arch/sparc/net/bpf_jit_comp_64.c  |  3 +-
 arch/x86/net/bpf_jit_comp.c       |  3 +-
 arch/x86/net/bpf_jit_comp32.c     |  5 ++--
 include/linux/bpf_verifier.h      |  5 ++--
 include/linux/filter.h            | 23 +++++++++++++---
 kernel/bpf/core.c                 | 23 +++++++++-------
 kernel/bpf/verifier.c             | 46 +++++++++++++++++++++----------
 17 files changed, 98 insertions(+), 45 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6a1c9fca5260b..92cadfdc88ca0 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1607,7 +1607,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
 		break;
 	/* speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_W:
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index ec2174838f2af..9d573f949081b 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1178,8 +1178,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			return ret;
 		break;
 
-	/* speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1:
+		/* TODO */
+		break;
+
+	/* speculative store bypass barrier */
+	case BPF_ST | BPF_NOSPEC_V4:
 		/*
 		 * Nothing required here.
 		 *
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index db9342b2d0e66..2a1b553738eea 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1023,7 +1023,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 		break;
 
 	/* Speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 
 	default:
diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c
index ace5db3fbd171..33c3fbb277f46 100644
--- a/arch/mips/net/bpf_jit_comp32.c
+++ b/arch/mips/net/bpf_jit_comp32.c
@@ -1684,7 +1684,8 @@ int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
 		emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
 		break;
 	/* Speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 	/* Atomics */
 	case BPF_STX | BPF_ATOMIC | BPF_W:
diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c
index fa7e9aa37f498..c654e3d3b46b8 100644
--- a/arch/mips/net/bpf_jit_comp64.c
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -843,7 +843,8 @@ int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
 		emit_stx(ctx, dst, src, off, BPF_SIZE(code));
 		break;
 	/* Speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 	/* Atomics */
 	case BPF_STX | BPF_ATOMIC | BPF_W:
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 7f91ea064c087..d285c17eaa6f4 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -801,7 +801,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		/*
 		 * BPF_ST NOSPEC (speculation barrier)
 		 */
-		case BPF_ST | BPF_NOSPEC:
+		case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+		case BPF_ST | BPF_NOSPEC_V4:
 			break;
 
 		/*
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0f8048f6dad63..aa32019239887 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -727,7 +727,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		/*
 		 * BPF_ST NOSPEC (speculation barrier)
 		 */
-		case BPF_ST | BPF_NOSPEC:
+		case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+		case BPF_ST | BPF_NOSPEC_V4:
 			if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) ||
 					!security_ftr_enabled(SEC_FTR_STF_BARRIER))
 				break;
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index 529a83b85c1c9..daa093a583541 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -1250,7 +1250,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		break;
 
 	/* speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 
 	case BPF_ST | BPF_MEM | BPF_B:
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c648864c8cd1a..467d13249ce77 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1558,7 +1558,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		break;
 	}
 	/* speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 
 	/* ST: *(size *)(dst + off) = imm */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5e9371fbf3d5f..c6280b462b6d4 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1244,7 +1244,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	/*
 	 * BPF_NOSPEC (speculation barrier)
 	 */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 	/*
 	 * BPF_ST(X)
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index fa0759bfe498e..2822249f678bc 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1288,7 +1288,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		break;
 	}
 	/* speculation barrier */
-	case BPF_ST | BPF_NOSPEC:
+	case BPF_ST | BPF_NOSPEC_V1: /* TODO */
+	case BPF_ST | BPF_NOSPEC_V4:
 		break;
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_W:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 438adb695daab..f8abf2bb8ab10 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1319,7 +1319,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 			break;
 
 			/* speculation barrier */
-		case BPF_ST | BPF_NOSPEC:
+		case BPF_ST | BPF_NOSPEC_V1:
+		case BPF_ST | BPF_NOSPEC_V4:
 			EMIT_LFENCE();
 			break;
 
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 429a89c5468b5..1f6d3b5ca24ec 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1902,8 +1902,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			i++;
 			break;
 		}
-		/* speculation barrier */
-		case BPF_ST | BPF_NOSPEC:
+		/* speculation barriers */
+		case BPF_ST | BPF_NOSPEC_V1:
+		case BPF_ST | BPF_NOSPEC_V4:
 			if (boot_cpu_has(X86_FEATURE_XMM2))
 				/* Emit 'lfence' */
 				EMIT3(0x0F, 0xAE, 0xE8);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6492670c5a4c1..caa776d33af31 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -476,8 +476,9 @@ struct bpf_insn_aux_data {
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
 	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
-	bool nospec; /* do not execute this insn speculatively, for Spectre v1 mitigation */
-	bool nospec_result; /* subject to Spectre v4 sanitation */
+	bool nospec_v1; /* prevent Spectre v1  */
+	bool nospec_v1_result; /* do not execute the following insn speculatively, for Spectre v1 mitigation */
+	bool nospec_v4_result; /* subject to Spectre v4 sanitation */
 	bool zext_dst; /* this insn zero extends dst reg */
 	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
 	bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f69114083ec71..a383adab9e735 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -72,10 +72,15 @@ struct ctl_table_header;
 /* unused opcode to mark call to interpreter with arguments */
 #define BPF_CALL_ARGS	0xe0
 
+/* unused opcode to mark speculation barrier for mitigating
+ * speculative bounds-check bypass and type confusion
+ */
+#define BPF_NOSPEC_V1	0xd0
+
 /* unused opcode to mark speculation barrier for mitigating
  * Speculative Store Bypass
  */
-#define BPF_NOSPEC	0xc0
+#define BPF_NOSPEC_V4	0xc0
 
 /* As per nm, we expose JITed images as text (code) section for
  * kallsyms. That way, tools like perf can find it to match
@@ -393,11 +398,21 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
 		.off   = 0,					\
 		.imm   = 0 })
 
-/* Speculation barrier */
+/* Speculative path execution barrier */
+
+#define BPF_ST_NOSPEC_V1()					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ST | BPF_NOSPEC_V1,		\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* Speculative-store-bypass barrier  */
 
-#define BPF_ST_NOSPEC()						\
+#define BPF_ST_NOSPEC_V4()					\
 	((struct bpf_insn) {					\
-		.code  = BPF_ST | BPF_NOSPEC,			\
+		.code  = BPF_ST | BPF_NOSPEC_V4,		\
 		.dst_reg = 0,					\
 		.src_reg = 0,					\
 		.off   = 0,					\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index dc85240a01342..e07af6f23e778 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1661,7 +1661,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 		/* Non-UAPI available opcodes. */
 		[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
 		[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
-		[BPF_ST  | BPF_NOSPEC] = &&ST_NOSPEC,
+		[BPF_ST  | BPF_NOSPEC_V1] = &&ST_NOSPEC_V1,
+		[BPF_ST  | BPF_NOSPEC_V4] = &&ST_NOSPEC_V4,
 		[BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
 		[BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
 		[BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
@@ -1908,15 +1909,17 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 	COND_JMP(s, JSLE, <=)
 #undef COND_JMP
 	/* ST, STX and LDX*/
-	ST_NOSPEC:
-		/* Speculation barrier for mitigating Speculative Store Bypass.
-		 * In case of arm64, we rely on the firmware mitigation as
-		 * controlled via the ssbd kernel parameter. Whenever the
-		 * mitigation is enabled, it works for all of the kernel code
-		 * with no need to provide any additional instructions here.
-		 * In case of x86, we use 'lfence' insn for mitigation. We
-		 * reuse preexisting logic from Spectre v1 mitigation that
-		 * happens to produce the required code on x86 for v4 as well.
+	ST_NOSPEC_V1:
+	ST_NOSPEC_V4:
+		/* Speculation barrier for mitigating Speculative Store Bypass,
+		 * Bounds-Check Bypass, and Type Confusion. In case of arm64, we
+		 * rely on the firmware mitigation as controlled via the ssbd
+		 * kernel parameter. Whenever the mitigation is enabled, it
+		 * works for all of the kernel code with no need to provide any
+		 * additional instructions here. In case of x86, we use 'lfence'
+		 * insn for mitigation. We reuse preexisting logic from Spectre
+		 * v1 mitigation that happens to produce the required code on
+		 * x86 for v4 as well.
 		 */
 		barrier_nospec();
 		CONT;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8366dd81a0d4d..cb4b5de1ef797 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1849,7 +1849,7 @@ static int push_stack(struct bpf_verifier_env *env,
 		 * speculatively. */
 		/* TODO: Is the nospec always put into the basic block of the
 		 * current insn? */
-		env->insn_aux_data[env->insn_idx].nospec |= true;
+		env->insn_aux_data[env->insn_idx].nospec_v1 |= true;
 		/* TODO: Reduce complexity by not doing/undoing the following? */
 		env->head = env->head->next;
 		env->stack_size--;
@@ -1865,7 +1865,7 @@ static int push_stack(struct bpf_verifier_env *env,
 		if (elem->st.speculative) {
 			/* Prevent the destination instruction from executing
 			 * speculatively. */
-			env->insn_aux_data[elem->insn_idx].nospec |= true;
+			env->insn_aux_data[elem->insn_idx].nospec_v1 |= true;
 			env->head = env->head->next;
 			env->stack_size--;
 			kfree(elem);
@@ -2406,7 +2406,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 	env->head = elem;
 	env->stack_size++;
 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
-		/* TODO: Fall back to nospec if specultive, see push_stack(). */
+		/* TODO: Fall back to nospec_v1 if specultive, see push_stack(). */
 		verbose(env,
 			"The sequence of %d jumps is too complex for async cb.\n",
 			env->stack_size);
@@ -4354,7 +4354,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 		}
 
 		if (sanitize)
-			env->insn_aux_data[insn_idx].nospec_result = true;
+			env->insn_aux_data[insn_idx].nospec_v4_result = true;
 	}
 
 	err = destroy_if_dynptr_stack_slot(env, state, spi);
@@ -6687,7 +6687,7 @@ static int check_stack_range_initialized(
 		min_off = max_off = reg->var_off.value + off;
 	} else {
 		if (!env->bypass_spec_v1) {
-			env->insn_aux_data[env->insn_idx].nospec |= true;
+			env->insn_aux_data[env->insn_idx].nospec_v1 |= true;
 		}
 		/* Only initialized buffer on stack is allowed to be accessed
 		 * with variable offset. With uninitialized buffer it's hard to
@@ -11794,7 +11794,7 @@ static int sanitize_err(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
-	env->insn_aux_data[env->insn_idx].nospec_result |= true;
+	env->insn_aux_data[env->insn_idx].nospec_v1_result |= true;
 	return 0;
 }
 
@@ -11848,11 +11848,11 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
 	case PTR_TO_STACK:
 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
 					dst_reg->off + dst_reg->var_off.value))
-			env->insn_aux_data[env->insn_idx].nospec_result |= true;
+			env->insn_aux_data[env->insn_idx].nospec_v1_result |= true;
 		break;
 	case PTR_TO_MAP_VALUE:
 		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
-			env->insn_aux_data[env->insn_idx].nospec_result |= true;
+			env->insn_aux_data[env->insn_idx].nospec_v1_result |= true;
 		}
 		break;
 	default:
@@ -16517,7 +16517,7 @@ static int do_check(struct bpf_verifier_env *env)
 				return err;
 			} else if (err < 0) {
 				BUG_ON(!state->speculative);
-				env->insn_aux_data[env->insn_idx].nospec |= true;
+				env->insn_aux_data[env->insn_idx].nospec_v1 |= true;
 
 				err = process_bpf_exit(env, &prev_insn_idx, pop_log, &do_print_state);
 				if (err == CHECK_NEXT_INSN) {
@@ -16614,7 +16614,7 @@ static int do_check(struct bpf_verifier_env *env)
 		} else if (err && state->speculative) {
 			BUG_ON(err > 0);
 			/* Terminate this speculative path forcefully. */
-			env->insn_aux_data[env->insn_idx].nospec |= true;
+			env->insn_aux_data[env->insn_idx].nospec_v1 |= true;
 
 			err = process_bpf_exit(env, &prev_insn_idx, pop_log, &do_print_state);
 			if (err == CHECK_NEXT_INSN) {
@@ -17631,10 +17631,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			type_other = true;
 		}
 
-		if (env->insn_aux_data[i + delta].nospec_result) {
+		if (env->insn_aux_data[i + delta].nospec_v4_result) {
 			struct bpf_insn patch[] = {
 				*insn,
-				BPF_ST_NOSPEC(),
+				BPF_ST_NOSPEC_V4(),
 			};
 
 			cnt = ARRAY_SIZE(patch);
@@ -17649,12 +17649,30 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 				continue;
 		}
 
-		if (env->insn_aux_data[i + delta].nospec) {
+		if (env->insn_aux_data[i + delta].nospec_v1_result) {
+			struct bpf_insn patch[] = {
+				*insn,
+				BPF_ST_NOSPEC_V1(),
+			};
+
+			cnt = ARRAY_SIZE(patch);
+			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			if (type == BPF_WRITE)
+				continue;
+		}
+
+		if (env->insn_aux_data[i + delta].nospec_v1) {
 			/* TODO: Check that previous instruction is not already
 			 * a nospec. Or even better, insert the lfence only at
 			 * the beginning of the basic block. */
 			struct bpf_insn patch[] = {
-				BPF_ST_NOSPEC(),
+				BPF_ST_NOSPEC_V1(),
 				*insn,
 			};
 
-- 
GitLab