diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5aecb4889415fe10b11a89495265ff934af4d7ea..058af73cf10c993416bd541d9ea5281dc3fc4995 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN min_pmtu - INTEGER default 552 - minimum discovered Path MTU +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv4 reply packets that are not + associated with a socket for example, TCP RSTs or ICMP echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -1048,6 +1055,13 @@ proxy_ndp - INTEGER 2 NDP packets are sent to userspace, where a userspace proxy can be implemented +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv6 reply packets that are not + associated with a socket for example, TCP RSTs or ICMPv6 echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + conf/interface/*: Change special settings per interface. diff --git a/include/net/ip.h b/include/net/ip.h index b53d65f24f7be8497b60475952d5a66c579aee68..f6c3f02823886d7df3e3ee05380174e4a1773f13 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -237,6 +237,9 @@ extern void ipfrag_init(void); extern void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5381973f099b046ad14e9a9091fd2581ea5310ac..f3d9b54e81d4d3351d879640bddf715cb4e3b62a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -110,6 +110,9 @@ struct frag_hdr { #define IP6_MF 0x0001 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include <net/sock.h> /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index bbd023a1c9b9affa8662f0b711ea1ff4203f5c9f..446db341c5b29e820fe853a58c976e90a4dbe084 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -56,6 +56,7 @@ struct netns_ipv4 { unsigned int sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; + int sysctl_fwmark_reflect; atomic_t rt_genid; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 81abfcb2eb4e7d8394dbf9ec5da60a361cbb34c3..20b76abcb15e82fc3f34c449e031f5fa55f11c6b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -25,6 +25,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int icmpv6_time; + int fwmark_reflect; }; struct netns_ipv6 { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2e109ff62ea38fc2b196c994420766987bcf5e8c..ea63a5767fcf0ff9b2ba78ade80c3cd5ca426e04 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -335,6 +335,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -347,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -358,6 +360,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = rt->rt_spec_dst; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -376,7 +379,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -388,6 +391,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -485,6 +489,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -581,6 +586,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -597,11 +603,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.tx_flags = 0; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); if (IS_ERR(rt)) goto out_unlock; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4910176d24ed5d8b4a8a5430a3bc3e33d8c1ba91..859ac52913a38027a1755a3a53ba178fda352398 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1500,7 +1500,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(sock_net(sk), skb->mark), RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 060667a61cc8e6ca783a879d04ca5f50a0584702..5ba9b363c7c8df896e23a88da6c5ecb9fb64499b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -787,6 +787,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_tcp_mem, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ba0c1479ba0f3d8112b45c323a798fc572619a0c..b595aa69d01699202f36111782c2abd3dfeb4efe 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -396,6 +396,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb->network_header + sizeof(*hdr)) > skb->tail) @@ -461,6 +462,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.daddr = hdr->saddr; if (saddr) fl6.saddr = *saddr; + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -469,6 +471,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -543,6 +546,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -559,11 +563,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.saddr = *saddr; fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 166a57c47d39cd4707bc48ad541afd23c34b12d1..1f872c332fc0041e33e430846299b57c7840cede 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -48,6 +48,13 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8a8fa2dd149fc3d3ada0ed353c006b96b3bbc8b6..bc777e4a7ebcd72ff3e7821cfef105770a0a04c0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -898,6 +898,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_proto = IPPROTO_TCP; if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));