From faec3cef2fac660e91ab001cdd35e9b1741fa07d Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 24 Apr 2025 09:18:58 +0200 Subject: [PATCH 01/11] decode: use PacketIsTunnelChild Instead of directly accessing the field Will allow PacketTunnelType to hold the precise tunnel type like DECODE_TUNNEL_ERSPANII with a modification of PacketIsTunnelChild --- src/decode.c | 4 ++-- src/log-pcap.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/decode.c b/src/decode.c index 9ba91f23dcdb..59ddb4d807ef 100644 --- a/src/decode.c +++ b/src/decode.c @@ -422,7 +422,7 @@ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *pare /* set the root ptr to the lowest layer */ if (parent->root != NULL) { p->root = parent->root; - BUG_ON(parent->ttype != PacketTunnelChild); + BUG_ON(!PacketIsTunnelChild(parent)); } else { p->root = parent; parent->ttype = PacketTunnelRoot; @@ -483,7 +483,7 @@ Packet *PacketDefragPktSetup(Packet *parent, const uint8_t *pkt, uint32_t len, u /* set the root ptr to the lowest layer */ if (parent->root != NULL) { p->root = parent->root; - BUG_ON(parent->ttype != PacketTunnelChild); + BUG_ON(!PacketIsTunnelChild(parent)); } else { p->root = parent; // we set parent->ttype later diff --git a/src/log-pcap.c b/src/log-pcap.c index c46921b2823a..269c52258876 100644 --- a/src/log-pcap.c +++ b/src/log-pcap.c @@ -258,7 +258,7 @@ static bool PcapLogCondition(ThreadVars *tv, void *thread_data, const Packet *p) return false; } - if (p->ttype == PacketTunnelChild) { + if (PacketIsTunnelChild(p)) { return false; } return true; @@ -390,7 +390,7 @@ static int PcapLogOpenHandles(PcapLogData *pl, const Packet *p) PCAPLOG_PROFILE_START; int datalink = p->datalink; - if (p->ttype == PacketTunnelChild) { + if (PacketIsTunnelChild(p)) { Packet *real_p = p->root; datalink = real_p->datalink; } @@ -626,7 +626,7 @@ static int PcapLog(ThreadVars *tv, void *thread_data, const Packet *p) pl->pkt_cnt++; pl->h->ts.tv_sec = SCTIME_SECS(p->ts); pl->h->ts.tv_usec = SCTIME_USECS(p->ts); - if (p->ttype == PacketTunnelChild) { + if (PacketIsTunnelChild(p)) { rp = p->root; pl->h->caplen = GET_PKT_LEN(rp); pl->h->len = GET_PKT_LEN(rp); @@ -700,7 +700,7 @@ static int PcapLog(ThreadVars *tv, void *thread_data, const Packet *p) /* PcapLogDumpSegment has written over the PcapLogData variables so need to update */ pl->h->ts.tv_sec = SCTIME_SECS(p->ts); pl->h->ts.tv_usec = SCTIME_USECS(p->ts); - if (p->ttype == PacketTunnelChild) { + if (PacketIsTunnelChild(p)) { rp = p->root; pl->h->caplen = GET_PKT_LEN(rp); pl->h->len = GET_PKT_LEN(rp); @@ -713,7 +713,7 @@ static int PcapLog(ThreadVars *tv, void *thread_data, const Packet *p) } } - if (p->ttype == PacketTunnelChild) { + if (PacketIsTunnelChild(p)) { rp = p->root; ret = PcapWrite(tv, td, GET_PKT_DATA(rp), len); } else { From 4a84481562c0fc32b1fc8133be27f256d1897c5d Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 24 Apr 2025 09:25:03 +0200 Subject: [PATCH 02/11] decode: merge DecodeTunnelProto into PacketTunnelType So that we know for a packet which precise type of tunnel it is (like erspan2). --- src/decode.c | 8 ++++---- src/decode.h | 29 +++++++++++++---------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/decode.c b/src/decode.c index 59ddb4d807ef..33433f931031 100644 --- a/src/decode.c +++ b/src/decode.c @@ -179,10 +179,10 @@ void PacketAlertFree(PacketAlert *pa_array) } static int DecodeTunnel(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t, - enum DecodeTunnelProto) WARN_UNUSED; + enum PacketTunnelType) WARN_UNUSED; static int DecodeTunnel(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, const uint8_t *pkt, - uint32_t len, enum DecodeTunnelProto proto) + uint32_t len, enum PacketTunnelType proto) { switch (proto) { case DECODE_TUNNEL_PPP: @@ -391,7 +391,7 @@ inline int PacketCopyData(Packet *p, const uint8_t *pktdata, uint32_t pktlen) * \retval p the pseudo packet or NULL if out of memory */ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *parent, - const uint8_t *pkt, uint32_t len, enum DecodeTunnelProto proto) + const uint8_t *pkt, uint32_t len, enum PacketTunnelType proto) { int ret; @@ -428,7 +428,7 @@ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *pare parent->ttype = PacketTunnelRoot; } /* tell new packet it's part of a tunnel */ - p->ttype = PacketTunnelChild; + p->ttype = proto; ret = DecodeTunnel(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), proto); diff --git a/src/decode.h b/src/decode.h index 62c699d7b2d5..b10fcec162fa 100644 --- a/src/decode.h +++ b/src/decode.h @@ -406,6 +406,17 @@ enum PacketTunnelType { PacketTunnelNone, PacketTunnelRoot, PacketTunnelChild, + DECODE_TUNNEL_ETHERNET, + DECODE_TUNNEL_ERSPANII, + DECODE_TUNNEL_ERSPANI, + DECODE_TUNNEL_VLAN, + DECODE_TUNNEL_IPV4, + DECODE_TUNNEL_IPV6, + DECODE_TUNNEL_IPV6_TEREDO, /**< separate protocol for stricter error handling */ + DECODE_TUNNEL_PPP, + DECODE_TUNNEL_NSH, + DECODE_TUNNEL_ARP, + DECODE_TUNNEL_UNSET }; /* forward declaration since Packet struct definition requires this */ @@ -1098,22 +1109,8 @@ static inline void PacketTunnelSetVerdicted(Packet *p) p->tunnel_verdicted = true; } -enum DecodeTunnelProto { - DECODE_TUNNEL_ETHERNET, - DECODE_TUNNEL_ERSPANII, - DECODE_TUNNEL_ERSPANI, - DECODE_TUNNEL_VLAN, - DECODE_TUNNEL_IPV4, - DECODE_TUNNEL_IPV6, - DECODE_TUNNEL_IPV6_TEREDO, /**< separate protocol for stricter error handling */ - DECODE_TUNNEL_PPP, - DECODE_TUNNEL_NSH, - DECODE_TUNNEL_ARP, - DECODE_TUNNEL_UNSET -}; - Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *parent, - const uint8_t *pkt, uint32_t len, enum DecodeTunnelProto proto); + const uint8_t *pkt, uint32_t len, enum PacketTunnelType proto); Packet *PacketDefragPktSetup(Packet *parent, const uint8_t *pkt, uint32_t len, uint8_t proto); void PacketDefragPktSetupParent(Packet *parent); void DecodeRegisterPerfCounters(DecodeThreadVars *, ThreadVars *); @@ -1362,7 +1359,7 @@ static inline bool PacketIsTunnelRoot(const Packet *p) static inline bool PacketIsTunnelChild(const Packet *p) { - return (p->ttype == PacketTunnelChild); + return (p->ttype > PacketTunnelRoot); } static inline bool PacketIsTunnel(const Packet *p) From 2ad49aef3794f1bf19f88f83a654bba954f3f8c4 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Fri, 2 May 2025 11:36:34 +0200 Subject: [PATCH 03/11] ebpf: check maps compatibility ebpf program does not handle 3 layers of vlan --- src/source-af-packet.c | 8 -------- src/util-ebpf.c | 14 ++++++++++++-- src/util-ebpf.h | 2 -- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/source-af-packet.c b/src/source-af-packet.c index e5c2c23fc4c1..6e636b388f0d 100644 --- a/src/source-af-packet.c +++ b/src/source-af-packet.c @@ -2282,7 +2282,6 @@ static int AFPBypassCallback(Packet *p) keys[0]->port16[1] = p->dp; keys[0]->vlan0 = p->vlan_id[0]; keys[0]->vlan1 = p->vlan_id[1]; - keys[0]->vlan2 = p->vlan_id[2]; if (p->proto == IPPROTO_TCP) { keys[0]->ip_proto = 1; @@ -2308,7 +2307,6 @@ static int AFPBypassCallback(Packet *p) keys[1]->port16[1] = p->sp; keys[1]->vlan0 = p->vlan_id[0]; keys[1]->vlan1 = p->vlan_id[1]; - keys[1]->vlan2 = p->vlan_id[2]; keys[1]->ip_proto = keys[0]->ip_proto; if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1], @@ -2343,7 +2341,6 @@ static int AFPBypassCallback(Packet *p) keys[0]->port16[1] = p->dp; keys[0]->vlan0 = p->vlan_id[0]; keys[0]->vlan1 = p->vlan_id[1]; - keys[0]->vlan2 = p->vlan_id[2]; if (p->proto == IPPROTO_TCP) { keys[0]->ip_proto = 1; @@ -2371,7 +2368,6 @@ static int AFPBypassCallback(Packet *p) keys[1]->port16[1] = p->sp; keys[1]->vlan0 = p->vlan_id[0]; keys[1]->vlan1 = p->vlan_id[1]; - keys[1]->vlan2 = p->vlan_id[2]; keys[1]->ip_proto = keys[0]->ip_proto; if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1], @@ -2439,7 +2435,6 @@ static int AFPXDPBypassCallback(Packet *p) keys[0]->port16[1] = htons(p->dp); keys[0]->vlan0 = p->vlan_id[0]; keys[0]->vlan1 = p->vlan_id[1]; - keys[0]->vlan2 = p->vlan_id[2]; if (p->proto == IPPROTO_TCP) { keys[0]->ip_proto = 1; } else { @@ -2464,7 +2459,6 @@ static int AFPXDPBypassCallback(Packet *p) keys[1]->port16[1] = htons(p->sp); keys[1]->vlan0 = p->vlan_id[0]; keys[1]->vlan1 = p->vlan_id[1]; - keys[1]->vlan2 = p->vlan_id[2]; keys[1]->ip_proto = keys[0]->ip_proto; if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1], p->afp_v.nr_cpus) == 0) { @@ -2497,7 +2491,6 @@ static int AFPXDPBypassCallback(Packet *p) keys[0]->port16[1] = htons(p->dp); keys[0]->vlan0 = p->vlan_id[0]; keys[0]->vlan1 = p->vlan_id[1]; - keys[0]->vlan2 = p->vlan_id[2]; if (p->proto == IPPROTO_TCP) { keys[0]->ip_proto = 1; } else { @@ -2524,7 +2517,6 @@ static int AFPXDPBypassCallback(Packet *p) keys[1]->port16[1] = htons(p->sp); keys[1]->vlan0 = p->vlan_id[0]; keys[1]->vlan1 = p->vlan_id[1]; - keys[1]->vlan2 = p->vlan_id[2]; keys[1]->ip_proto = keys[0]->ip_proto; if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1], p->afp_v.nr_cpus) == 0) { diff --git a/src/util-ebpf.c b/src/util-ebpf.c index e6f5e7fe915b..1d2d44681f5f 100644 --- a/src/util-ebpf.c +++ b/src/util-ebpf.c @@ -413,6 +413,18 @@ int EBPFLoadFile(const char *iface, const char *path, const char * section, SCLogError("Too many BPF maps in eBPF files"); break; } + if (strcmp(bpf_map__name(map), "flow_table_v4") == 0) { + if (bpf_map__key_size(map) != sizeof(struct flowv4_keys)) { + SCLogError("Incompatible flow_table_v4"); + break; + } + } + if (strcmp(bpf_map__name(map), "flow_table_v6") == 0) { + if (bpf_map__key_size(map) != sizeof(struct flowv6_keys)) { + SCLogError("Incompatible flow_table_v6"); + break; + } + } SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map)); bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map); bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map)); @@ -749,7 +761,6 @@ static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char flow_key.dst.addr_data32[3] = 0; flow_key.vlan_id[0] = next_key.vlan0; flow_key.vlan_id[1] = next_key.vlan1; - flow_key.vlan_id[2] = next_key.vlan2; if (next_key.ip_proto == 1) { flow_key.proto = IPPROTO_TCP; } else { @@ -868,7 +879,6 @@ static int EBPFForEachFlowV6Table(ThreadVars *th_v, } flow_key.vlan_id[0] = next_key.vlan0; flow_key.vlan_id[1] = next_key.vlan1; - flow_key.vlan_id[2] = next_key.vlan2; if (next_key.ip_proto == 1) { flow_key.proto = IPPROTO_TCP; } else { diff --git a/src/util-ebpf.h b/src/util-ebpf.h index fe31f5912034..68c7b86959ed 100644 --- a/src/util-ebpf.h +++ b/src/util-ebpf.h @@ -45,7 +45,6 @@ struct flowv4_keys { __u8 ip_proto:1; __u16 vlan0:15; __u16 vlan1; - __u16 vlan2; }; struct flowv6_keys { @@ -58,7 +57,6 @@ struct flowv6_keys { __u8 ip_proto:1; __u16 vlan0:15; __u16 vlan1; - __u16 vlan2; }; struct pair { From 54e360a3b365b7dcbc0ba412e03b56324b009325 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 24 Apr 2025 10:56:23 +0200 Subject: [PATCH 04/11] decode/xvlan: treat as its own tunnel Ticket: 7717 Allows for instance to process/log ARP packets over VXLAN. That means we need to decode the ethernet layer above vxlan instead of skipping it as part of the vxlan, even if the vxlan decoder still checks the ethernet layer to avoid FPs. --- src/decode-vxlan.c | 15 ++++++++------- src/decode.c | 2 ++ src/decode.h | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/decode-vxlan.c b/src/decode-vxlan.c index 92433a0fcded..b3096f4edf71 100644 --- a/src/decode-vxlan.c +++ b/src/decode-vxlan.c @@ -147,7 +147,7 @@ int DecodeVXLAN(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, StatsIncr(tv, dtv->counter_vxlan); EthernetHdr *ethh = (EthernetHdr *)(pkt + VXLAN_HEADER_LEN); - int decode_tunnel_proto = DECODE_TUNNEL_UNSET; + bool eth_ok = false; /* Look at encapsulated Ethernet frame to get next protocol */ uint16_t eth_type = SCNtohs(ethh->eth_type); @@ -156,20 +156,21 @@ int DecodeVXLAN(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, switch (eth_type) { case ETHERNET_TYPE_ARP: SCLogDebug("VXLAN found ARP"); + eth_ok = true; break; case ETHERNET_TYPE_IP: SCLogDebug("VXLAN found IPv4"); - decode_tunnel_proto = DECODE_TUNNEL_IPV4; + eth_ok = true; break; case ETHERNET_TYPE_IPV6: SCLogDebug("VXLAN found IPv6"); - decode_tunnel_proto = DECODE_TUNNEL_IPV6; + eth_ok = true; break; case ETHERNET_TYPE_VLAN: case ETHERNET_TYPE_8021AD: case ETHERNET_TYPE_8021QINQ: SCLogDebug("VXLAN found VLAN"); - decode_tunnel_proto = DECODE_TUNNEL_VLAN; + eth_ok = true; break; default: SCLogDebug("VXLAN found unsupported Ethertype - expected IPv4, IPv6, VLAN, or ARP"); @@ -177,9 +178,9 @@ int DecodeVXLAN(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, } /* Set-up and process inner packet if it is a supported ethertype */ - if (decode_tunnel_proto != DECODE_TUNNEL_UNSET) { - Packet *tp = PacketTunnelPktSetup(tv, dtv, p, pkt + VXLAN_HEADER_LEN + ETHERNET_HEADER_LEN, - len - (VXLAN_HEADER_LEN + ETHERNET_HEADER_LEN), decode_tunnel_proto); + if (eth_ok) { + Packet *tp = PacketTunnelPktSetup( + tv, dtv, p, pkt + VXLAN_HEADER_LEN, len - VXLAN_HEADER_LEN, DECODE_TUNNEL_VXLAN); if (tp != NULL) { PKT_SET_SRC(tp, PKT_SRC_DECODER_VXLAN); PacketEnqueueNoLock(&tv->decode_pq, tp); diff --git a/src/decode.c b/src/decode.c index 33433f931031..76bd0a76660c 100644 --- a/src/decode.c +++ b/src/decode.c @@ -202,6 +202,8 @@ static int DecodeTunnel(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, const return DecodeERSPAN(tv, dtv, p, pkt, len); case DECODE_TUNNEL_ERSPANI: return DecodeERSPANTypeI(tv, dtv, p, pkt, len); + case DECODE_TUNNEL_VXLAN: + return DecodeEthernet(tv, dtv, p, pkt, len); case DECODE_TUNNEL_NSH: return DecodeNSH(tv, dtv, p, pkt, len); case DECODE_TUNNEL_ARP: diff --git a/src/decode.h b/src/decode.h index b10fcec162fa..f6ff645a0242 100644 --- a/src/decode.h +++ b/src/decode.h @@ -409,6 +409,7 @@ enum PacketTunnelType { DECODE_TUNNEL_ETHERNET, DECODE_TUNNEL_ERSPANII, DECODE_TUNNEL_ERSPANI, + DECODE_TUNNEL_VXLAN, DECODE_TUNNEL_VLAN, DECODE_TUNNEL_IPV4, DECODE_TUNNEL_IPV6, From 5fba6a8977ff751d0c28a3ca2d0b1b0c5fab69a4 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Fri, 23 May 2025 08:58:12 +0200 Subject: [PATCH 05/11] flow: factorize duplicated code for hashing --- src/flow-hash.c | 107 +++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 61 deletions(-) diff --git a/src/flow-hash.c b/src/flow-hash.c index 632e5668b774..82de959c1887 100644 --- a/src/flow-hash.c +++ b/src/flow-hash.c @@ -114,6 +114,35 @@ typedef struct FlowHashKey6_ { }; } FlowHashKey6; +static inline void FlowHashIp4Fill(FlowHashKey4 *fhk, const Packet *p) +{ + fhk->proto = p->proto; + /* g_recurlvl_mask sets the recursion_level to 0 if + * decoder.recursion-level.use-for-tracking is disabled. + */ + fhk->recur = (uint8_t)p->recursion_level & g_recurlvl_mask; + /* g_livedev_mask sets the livedev ids to 0 if livedev.use-for-tracking + * is disabled. */ + uint16_t devid = p->livedev ? p->livedev->id : 0; + fhk->livedev = devid & g_livedev_mask; + /* g_vlan_mask sets the vlan_ids to 0 if vlan.use-for-tracking + * is disabled. */ + fhk->vlan_id[0] = p->vlan_id[0] & g_vlan_mask; + fhk->vlan_id[1] = p->vlan_id[1] & g_vlan_mask; + fhk->vlan_id[2] = p->vlan_id[2] & g_vlan_mask; +} + +static inline void FlowHashIp6Fill(FlowHashKey6 *fhk, const Packet *p) +{ + fhk->proto = p->proto; + fhk->recur = (uint8_t)p->recursion_level & g_recurlvl_mask; + uint16_t devid = p->livedev ? p->livedev->id : 0; + fhk->livedev = devid & g_livedev_mask; + fhk->vlan_id[0] = p->vlan_id[0] & g_vlan_mask; + fhk->vlan_id[1] = p->vlan_id[1] & g_vlan_mask; + fhk->vlan_id[2] = p->vlan_id[2] & g_vlan_mask; +} + uint32_t FlowGetIpPairProtoHash(const Packet *p) { uint32_t hash = 0; @@ -129,16 +158,7 @@ uint32_t FlowGetIpPairProtoHash(const Packet *p) fhk.ports[0] = 0xfedc; fhk.ports[1] = 0xba98; - fhk.proto = (uint8_t)p->proto; - /* g_recurlvl_mask sets the recursion_level to 0 if - * decoder.recursion-level.use-for-tracking is disabled. - */ - fhk.recur = (uint8_t)p->recursion_level & g_recurlvl_mask; - /* g_vlan_mask sets the vlan_ids to 0 if vlan.use-for-tracking - * is disabled. */ - fhk.vlan_id[0] = p->vlan_id[0] & g_vlan_mask; - fhk.vlan_id[1] = p->vlan_id[1] & g_vlan_mask; - fhk.vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + FlowHashIp4Fill(&fhk, p); hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } else if (PacketIsIPv6(p)) { @@ -167,11 +187,8 @@ uint32_t FlowGetIpPairProtoHash(const Packet *p) fhk.ports[0] = 0xfedc; fhk.ports[1] = 0xba98; - fhk.proto = (uint8_t)p->proto; - fhk.recur = (uint8_t)p->recursion_level & g_recurlvl_mask; - fhk.vlan_id[0] = p->vlan_id[0] & g_vlan_mask; - fhk.vlan_id[1] = p->vlan_id[1] & g_vlan_mask; - fhk.vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + + FlowHashIp6Fill(&fhk, p); hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } @@ -207,20 +224,7 @@ static inline uint32_t FlowGetHash(const Packet *p) fhk.ports[1-pi] = p->sp; fhk.ports[pi] = p->dp; - fhk.proto = p->proto; - /* g_recurlvl_mask sets the recursion_level to 0 if - * decoder.recursion-level.use-for-tracking is disabled. - */ - fhk.recur = p->recursion_level & g_recurlvl_mask; - /* g_livedev_mask sets the livedev ids to 0 if livedev.use-for-tracking - * is disabled. */ - uint16_t devid = p->livedev ? p->livedev->id : 0; - fhk.livedev = devid & g_livedev_mask; - /* g_vlan_mask sets the vlan_ids to 0 if vlan.use-for-tracking - * is disabled. */ - fhk.vlan_id[0] = p->vlan_id[0] & g_vlan_mask; - fhk.vlan_id[1] = p->vlan_id[1] & g_vlan_mask; - fhk.vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + FlowHashIp4Fill(&fhk, p); hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); @@ -237,13 +241,8 @@ static inline uint32_t FlowGetHash(const Packet *p) fhk.ports[1 - pi] = p->l4.vars.icmpv4.emb_sport; fhk.ports[pi] = p->l4.vars.icmpv4.emb_dport; + FlowHashIp4Fill(&fhk, p); fhk.proto = ICMPV4_GET_EMB_PROTO(p); - fhk.recur = p->recursion_level & g_recurlvl_mask; - uint16_t devid = p->livedev ? p->livedev->id : 0; - fhk.livedev = devid & g_livedev_mask; - fhk.vlan_id[0] = p->vlan_id[0] & g_vlan_mask; - fhk.vlan_id[1] = p->vlan_id[1] & g_vlan_mask; - fhk.vlan_id[2] = p->vlan_id[2] & g_vlan_mask; hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); @@ -254,13 +253,7 @@ static inline uint32_t FlowGetHash(const Packet *p) fhk.addrs[ai] = p->dst.addr_data32[0]; fhk.ports[0] = 0xfeed; fhk.ports[1] = 0xbeef; - fhk.proto = p->proto; - fhk.recur = p->recursion_level & g_recurlvl_mask; - uint16_t devid = p->livedev ? p->livedev->id : 0; - fhk.livedev = devid & g_livedev_mask; - fhk.vlan_id[0] = p->vlan_id[0] & g_vlan_mask; - fhk.vlan_id[1] = p->vlan_id[1] & g_vlan_mask; - fhk.vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + FlowHashIp4Fill(&fhk, p); hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } @@ -289,13 +282,7 @@ static inline uint32_t FlowGetHash(const Packet *p) const int pi = (p->sp > p->dp); fhk.ports[1-pi] = p->sp; fhk.ports[pi] = p->dp; - fhk.proto = p->proto; - fhk.recur = p->recursion_level & g_recurlvl_mask; - uint16_t devid = p->livedev ? p->livedev->id : 0; - fhk.livedev = devid & g_livedev_mask; - fhk.vlan_id[0] = p->vlan_id[0] & g_vlan_mask; - fhk.vlan_id[1] = p->vlan_id[1] & g_vlan_mask; - fhk.vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + FlowHashIp6Fill(&fhk, p); hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } @@ -409,6 +396,11 @@ static inline bool CmpLiveDevIds(const LiveDevice *livedev, const uint16_t id) return (((devid ^ id) & g_livedev_mask) == 0); } +#define CmpFlowMisc(x, y) \ + (((x)->proto == (y)->proto) && \ + ((x)->recursion_level == (y)->recursion_level || g_recurlvl_mask == 0) && \ + CmpVlanIds((x)->vlan_id, (y)->vlan_id)) + /* Since two or more flows can have the same hash key, we need to compare * the flow with the current packet or flow key. */ static inline bool CmpFlowPacket(const Flow *f, const Packet *p) @@ -418,9 +410,7 @@ static inline bool CmpFlowPacket(const Flow *f, const Packet *p) const uint32_t *p_src = p->src.address.address_un_data32; const uint32_t *p_dst = p->dst.address.address_un_data32; return CmpAddrsAndPorts(f_src, f_dst, f->sp, f->dp, p_src, p_dst, p->sp, p->dp) && - f->proto == p->proto && - (f->recursion_level == p->recursion_level || g_recurlvl_mask == 0) && - CmpVlanIds(f->vlan_id, p->vlan_id) && (f->livedev == p->livedev || g_livedev_mask == 0); + CmpFlowMisc(f, p) && (f->livedev == p->livedev || g_livedev_mask == 0); } static inline bool CmpFlowKey(const Flow *f, const FlowKey *k) @@ -430,9 +420,7 @@ static inline bool CmpFlowKey(const Flow *f, const FlowKey *k) const uint32_t *k_src = k->src.address.address_un_data32; const uint32_t *k_dst = k->dst.address.address_un_data32; return CmpAddrsAndPorts(f_src, f_dst, f->sp, f->dp, k_src, k_dst, k->sp, k->dp) && - f->proto == k->proto && - (f->recursion_level == k->recursion_level || g_recurlvl_mask == 0) && - CmpVlanIds(f->vlan_id, k->vlan_id) && CmpLiveDevIds(f->livedev, k->livedev_id); + CmpFlowMisc(f, k) && CmpLiveDevIds(f->livedev, k->livedev_id); } static inline bool CmpAddrsAndICMPTypes(const uint32_t src1[4], @@ -457,9 +445,7 @@ static inline bool CmpFlowICMPPacket(const Flow *f, const Packet *p) const uint32_t *p_dst = p->dst.address.address_un_data32; return CmpAddrsAndICMPTypes(f_src, f_dst, f->icmp_s.type, f->icmp_d.type, p_src, p_dst, p->icmp_s.type, p->icmp_d.type) && - f->proto == p->proto && - (f->recursion_level == p->recursion_level || g_recurlvl_mask == 0) && - CmpVlanIds(f->vlan_id, p->vlan_id) && (f->livedev == p->livedev || g_livedev_mask == 0); + CmpFlowMisc(f, p) && (f->livedev == p->livedev || g_livedev_mask == 0); } /** @@ -524,9 +510,8 @@ static inline int FlowCompareESP(Flow *f, const Packet *p) const uint32_t *p_src = p->src.address.address_un_data32; const uint32_t *p_dst = p->dst.address.address_un_data32; - return CmpAddrs(f_src, p_src) && CmpAddrs(f_dst, p_dst) && f->proto == p->proto && - (f->recursion_level == p->recursion_level || g_recurlvl_mask == 0) && - CmpVlanIds(f->vlan_id, p->vlan_id) && f->esp.spi == ESP_GET_SPI(PacketGetESP(p)) && + return CmpAddrs(f_src, p_src) && CmpAddrs(f_dst, p_dst) && CmpFlowMisc(f, p) && + f->esp.spi == ESP_GET_SPI(PacketGetESP(p)) && (f->livedev == p->livedev || g_livedev_mask == 0); } From 591a64ff1926874636830188dec1c146bc9b5636 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 15 May 2025 14:15:07 +0200 Subject: [PATCH 06/11] flow: adds a tunnel identifier Ticket: 7674 To distinguish flows with the same 5-tuple but coming from different tunnel sources. --- doc/userguide/configuration/suricata-yaml.rst | 20 +++ etc/schema.json | 8 ++ rust/src/conf.rs | 42 ++++++ rust/src/decode.rs | 134 ++++++++++++++++++ rust/src/lib.rs | 2 + rust/sys/src/sys.rs | 20 +++ src/bindgen.h | 4 + src/decode-erspan.c | 2 +- src/decode-vxlan.c | 16 ++- src/decode.c | 26 +++- src/decode.h | 49 ++++--- src/flow-hash.c | 41 +++--- src/flow-util.c | 1 + src/flow.h | 2 + src/output-json-alert.c | 3 + src/output-json-flow.c | 3 + src/output-json.c | 4 +- suricata.yaml.in | 8 ++ 18 files changed, 335 insertions(+), 50 deletions(-) create mode 100644 rust/src/decode.rs diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 94fa3bb488ca..0922dade725e 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -3025,6 +3025,26 @@ default. Using this default setting, flows will be associated only if the compared packet headers are encapsulated in the same number of headers. +Tunnels +~~~~~~~ + +If your packets sources are multiple tunnels encapsulating the traffic, +you can configure the ``decoder.tunnels`` section to assign a tunnel +identifier to each of these tunnels. + +These tunnel identifiers are used in flow hashing to be able to distinguish +the same-looking flow (same 5-tuple) from different tunnels, meaning it +is in fact a different subnetwork (like a VLAN identifier). + +This section is a list of tunnels with the following parameters: +:: + + - id: 1 + type: erspan2 # or vxlan + src: 192.168.1.1 + dst: 192.168.1.3 + session: 123 # erspan span id or vxlan vni + Advanced Options ---------------- diff --git a/etc/schema.json b/etc/schema.json index 5024cd627d79..86eb46378ee1 100644 --- a/etc/schema.json +++ b/etc/schema.json @@ -7839,9 +7839,17 @@ }, "src_port": { "type": "integer" + }, + "tunnel_id": { + "type": "integer", + "description": "If any, the tunnel identifier defined in suricata.yaml decoder.tunnels section" } } }, + "tunnel_id": { + "type": "integer", + "description": "if any, the tunnel identifier defined in suricata.yaml decoder.tunnels section" + }, "tx_guessed": { "type": "boolean", "description": diff --git a/rust/src/conf.rs b/rust/src/conf.rs index 944302b6396b..31fb1ef405ea 100644 --- a/rust/src/conf.rs +++ b/rust/src/conf.rs @@ -29,6 +29,7 @@ use std::os::raw::c_char; use std::os::raw::c_int; use std::ptr; use std::str; +use std::str::FromStr; use suricata_sys::sys::SCConfGet; use suricata_sys::sys::SCConfGetChildValue; use suricata_sys::sys::SCConfGetChildValueBool; @@ -87,15 +88,49 @@ pub fn conf_get_bool(key: &str) -> bool { /// Wrap a Suricata ConfNode and expose some of its methods with a /// Rust friendly interface. +#[derive(Copy, Clone)] pub struct ConfNode { pub conf: *const SCConfNode, } +pub(crate) struct ConfNodeIter { + node: ConfNode, + start: bool, +} + +impl Iterator for ConfNodeIter { + type Item = ConfNode; + + fn next(&mut self) -> Option { + let r = if self.start { + self.start = false; + self.node.first() + } else { + self.node.next() + }; + if let Some(n) = r { + self.node = n; + return Some(self.node); + } + r + } +} + impl ConfNode { pub fn wrap(conf: *const SCConfNode) -> Self { return Self { conf }; } + // Return the value of key as T like u16. + pub(crate) fn get_child_from(&self, key: &str) -> Option { + if let Some(n) = self.get_child_node(key) { + if let Ok(r) = T::from_str(n.value()) { + return Some(r); + } + } + return None; + } + pub fn get_child_node(&self, key: &str) -> Option { let node = unsafe { let s = CString::new(key).unwrap(); @@ -108,6 +143,13 @@ impl ConfNode { } } + pub(crate) fn iter(&self) -> ConfNodeIter { + ConfNodeIter { + node: *self, + start: true, + } + } + pub fn first(&self) -> Option { let node = unsafe { SCConfGetFirstNode(self.conf) }; if node.is_null() { diff --git a/rust/src/decode.rs b/rust/src/decode.rs new file mode 100644 index 000000000000..555c5cbc8af2 --- /dev/null +++ b/rust/src/decode.rs @@ -0,0 +1,134 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +//! Decode module. + +use crate::conf::conf_get_node; +use std::collections::HashMap; +use std::net::Ipv4Addr; +use suricata_sys::sys::SCPacketTunnelType; + +pub const PKT_TUNNEL_UNKNOWN: u16 = u16::MAX; + +fn decoder_tunnel_type(s: Option<&str>) -> Option { + return match s { + Some("erspan2") => Some(SCPacketTunnelType::DECODE_TUNNEL_ERSPANII as u8), + Some("vxlan") => Some(SCPacketTunnelType::DECODE_TUNNEL_VXLAN as u8), + _ => None, + }; +} + +fn decoder_ipv4(o: Option<&str>) -> Option { + if let Some(s) = o { + if let Ok(i) = s.parse::() { + return Some(u32::from_be_bytes(i.octets())); + } + } + return None; +} + +#[repr(C)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct flowtunnel_keys { + src: u32, + dst: u32, + // would be nice to have this field u24, like C __u32 session : 24; __u8 tunnel : 8; + session: u32, // erspan spanid or vxlan vni + tunnel_type: u8, +} + +#[no_mangle] +pub unsafe extern "C" fn DecodeTunnelsConfig() -> *mut HashMap { + let mut r = HashMap::new(); + if let Some(n) = conf_get_node("decoder.tunnels") { + for nodeu in n.iter() { + // Get all the fields with their right types + let nid = nodeu.get_child_from::("id"); + if nid.is_none() { + SCLogWarning!("missing id for decoder tunnel"); + continue; + } + let nid = nid.unwrap(); + + let ntype = nodeu.get_child_value("type"); + let tunnel_type = decoder_tunnel_type(ntype); + if tunnel_type.is_none() { + SCLogWarning!("unknown type for decoder tunnel {:?}", ntype); + continue; + } + let tunnel_type = tunnel_type.unwrap(); + + let session = nodeu.get_child_from::("session"); + if session.is_none() { + SCLogWarning!("missing session for decoder tunnel"); + continue; + } + let session = session.unwrap(); + + let nsrc = nodeu.get_child_value("src"); + let src = decoder_ipv4(nsrc); + if src.is_none() { + SCLogWarning!("invalid ipv4 src for decoder tunnel {:?}", nsrc); + continue; + } + let src = src.unwrap(); + + let ndst = nodeu.get_child_value("dst"); + let dst = decoder_ipv4(ndst); + if dst.is_none() { + SCLogWarning!("invalid ipv4 src for decoder tunnel {:?}", ndst); + continue; + } + let dst = dst.unwrap(); + + // Finally insert the tunnel in the map + let k = flowtunnel_keys { + src, + dst, + session, + tunnel_type, + }; + r.insert(k, nid); + } + } else { + // no decoder.tunnels section in conf + return std::ptr::null_mut(); + } + return Box::into_raw(Box::new(r)); +} + +#[no_mangle] +pub unsafe extern "C" fn DecodeTunnelsFree(map: *mut HashMap) { + if !map.is_null() { + let _ = Box::from_raw(map); // Automatically dropped at end of scope + } +} + +#[no_mangle] +pub unsafe extern "C" fn DecodeTunnelsId( + map: *mut HashMap, key: flowtunnel_keys, +) -> u16 { + if map.is_null() { + return PKT_TUNNEL_UNKNOWN; + } + + let map = &*map; + match map.get(&key) { + Some(value) => *value, + None => PKT_TUNNEL_UNKNOWN, + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index bb7fe6e9fdf6..fadcc0d5e979 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -82,6 +82,8 @@ pub mod core; #[macro_use] pub mod debug; +pub mod decode; + pub mod common; pub mod conf; pub mod jsonbuilder; diff --git a/rust/sys/src/sys.rs b/rust/sys/src/sys.rs index 1a0748abb8ba..df463a73ddc2 100644 --- a/rust/sys/src/sys.rs +++ b/rust/sys/src/sys.rs @@ -418,6 +418,26 @@ extern "C" { extern "C" { pub fn SCLogGetLogLevel() -> SCLogLevel; } +#[repr(u32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum SCPacketTunnelType { + PacketTunnelNone = 0, + PacketTunnelRoot = 1, + PacketTunnelChild = 2, + DECODE_TUNNEL_ETHERNET = 3, + DECODE_TUNNEL_ERSPANII = 4, + DECODE_TUNNEL_ERSPANI = 5, + DECODE_TUNNEL_VXLAN = 6, + DECODE_TUNNEL_VLAN = 7, + DECODE_TUNNEL_IPV4 = 8, + DECODE_TUNNEL_IPV6 = 9, + #[doc = "< separate protocol for stricter error handling"] + DECODE_TUNNEL_IPV6_TEREDO = 10, + DECODE_TUNNEL_PPP = 11, + DECODE_TUNNEL_NSH = 12, + DECODE_TUNNEL_ARP = 13, + DECODE_TUNNEL_UNSET = 14, +} #[doc = " Structure of a configuration parameter."] #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/src/bindgen.h b/src/bindgen.h index d11078523795..b126ed39fb65 100644 --- a/src/bindgen.h +++ b/src/bindgen.h @@ -33,10 +33,12 @@ #include "app-layer-protos.h" #include "suricata-plugin.h" + // do not export struct fields only used for debug validation // do this after suricata-plugin.h which needs autoconf.h to define SC_PACKAGE_VERSION #undef DEBUG_VALIDATION #include "output-eve-bindgen.h" + #include "detect-engine-register.h" #include "detect-engine-buffer.h" #include "detect-engine-helper.h" @@ -44,6 +46,8 @@ #include "util-debug.h" +#include "decode.h" + #include "conf.h" #include "app-layer-detect-proto.h" diff --git a/src/decode-erspan.c b/src/decode-erspan.c index 0b1a37425b12..ffb28ed846d8 100644 --- a/src/decode-erspan.c +++ b/src/decode-erspan.c @@ -107,7 +107,7 @@ int DecodeERSPAN(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, const uint8_t p->vlan_id[p->vlan_idx] = vlan_id; p->vlan_idx++; } - + PacketGetTunnelId(p, SCNtohs(ehdr->flags_spanid) & 0x03ff); return DecodeEthernet(tv, dtv, p, pkt + sizeof(ErspanHdr), len - sizeof(ErspanHdr)); } diff --git a/src/decode-vxlan.c b/src/decode-vxlan.c index b3096f4edf71..d365077b0d9e 100644 --- a/src/decode-vxlan.c +++ b/src/decode-vxlan.c @@ -116,6 +116,17 @@ void DecodeVXLANConfig(void) } } +// Just get the tunnel id (for the flow hash) +// Then pass on to ethernet decoder on next layer +int DecodeVXLANtunnel( + ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, const uint8_t *pkt, uint32_t len) +{ + const VXLANHeader *vxlanh = (const VXLANHeader *)pkt; + uint32_t vni = (vxlanh->vni[0] << 16) + (vxlanh->vni[1] << 8) + (vxlanh->vni[2]); + PacketGetTunnelId(p, vni); + return DecodeEthernet(tv, dtv, p, pkt + VXLAN_HEADER_LEN, len - VXLAN_HEADER_LEN); +} + /** \param pkt payload data directly above UDP header * \param len length in bytes of pkt */ @@ -179,8 +190,9 @@ int DecodeVXLAN(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, /* Set-up and process inner packet if it is a supported ethertype */ if (eth_ok) { - Packet *tp = PacketTunnelPktSetup( - tv, dtv, p, pkt + VXLAN_HEADER_LEN, len - VXLAN_HEADER_LEN, DECODE_TUNNEL_VXLAN); + // do not advance in packet, as we will need child packet to read vxlan header + // to compute its tunnel_id before computing its hash + Packet *tp = PacketTunnelPktSetup(tv, dtv, p, pkt, len, DECODE_TUNNEL_VXLAN); if (tp != NULL) { PKT_SET_SRC(tp, PKT_SRC_DECODER_VXLAN); PacketEnqueueNoLock(&tv->decode_pq, tp); diff --git a/src/decode.c b/src/decode.c index 76bd0a76660c..99852bac5f6c 100644 --- a/src/decode.c +++ b/src/decode.c @@ -179,10 +179,10 @@ void PacketAlertFree(PacketAlert *pa_array) } static int DecodeTunnel(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t, - enum PacketTunnelType) WARN_UNUSED; + enum SCPacketTunnelType) WARN_UNUSED; static int DecodeTunnel(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, const uint8_t *pkt, - uint32_t len, enum PacketTunnelType proto) + uint32_t len, enum SCPacketTunnelType proto) { switch (proto) { case DECODE_TUNNEL_PPP: @@ -203,7 +203,7 @@ static int DecodeTunnel(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, const case DECODE_TUNNEL_ERSPANI: return DecodeERSPANTypeI(tv, dtv, p, pkt, len); case DECODE_TUNNEL_VXLAN: - return DecodeEthernet(tv, dtv, p, pkt, len); + return DecodeVXLANtunnel(tv, dtv, p, pkt, len); case DECODE_TUNNEL_NSH: return DecodeNSH(tv, dtv, p, pkt, len); case DECODE_TUNNEL_ARP: @@ -382,6 +382,22 @@ inline int PacketCopyData(Packet *p, const uint8_t *pktdata, uint32_t pktlen) return PacketCopyDataOffset(p, 0, pktdata, pktlen); } +static void *decode_tunnels_map; + +void PacketGetTunnelId(Packet *p, uint32_t session) +{ + if (decode_tunnels_map == NULL || p->root == NULL || !PacketIsIPv4(p->root)) { + p->tunnel_id = PKT_TUNNEL_UNKNOWN; + return; + } + struct flowtunnel_keys k = {}; + k.src = htonl(GET_IPV4_SRC_ADDR_U32(p->root)); + k.dst = htonl(GET_IPV4_DST_ADDR_U32(p->root)); + k.session = session; + k.tunnel_type = (uint8_t)p->ttype; + p->tunnel_id = DecodeTunnelsId(decode_tunnels_map, k); +} + /** * \brief Setup a pseudo packet (tunnel) * @@ -393,7 +409,7 @@ inline int PacketCopyData(Packet *p, const uint8_t *pktdata, uint32_t pktlen) * \retval p the pseudo packet or NULL if out of memory */ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *parent, - const uint8_t *pkt, uint32_t len, enum PacketTunnelType proto) + const uint8_t *pkt, uint32_t len, enum SCPacketTunnelType proto) { int ret; @@ -609,6 +625,7 @@ void DecodeUnregisterCounters(void) g_counter_table = NULL; } SCMutexUnlock(&g_counter_table_mutex); + DecodeTunnelsFree(decode_tunnels_map); } static bool IsDefragMemcapExceptionPolicyStatsValid(enum ExceptionPolicy policy) @@ -1055,6 +1072,7 @@ void DecodeGlobalConfig(void) DecodeGeneveConfig(); DecodeVXLANConfig(); DecodeERSPANConfig(); + decode_tunnels_map = DecodeTunnelsConfig(); intmax_t value = 0; if (SCConfGetInt("decoder.max-layers", &value) == 1) { if (value < 0 || value > UINT8_MAX) { diff --git a/src/decode.h b/src/decode.h index f6ff645a0242..15530f32d456 100644 --- a/src/decode.h +++ b/src/decode.h @@ -24,6 +24,25 @@ #ifndef SURICATA_DECODE_H #define SURICATA_DECODE_H +enum SCPacketTunnelType { + PacketTunnelNone, + PacketTunnelRoot, + PacketTunnelChild, + DECODE_TUNNEL_ETHERNET, + DECODE_TUNNEL_ERSPANII, + DECODE_TUNNEL_ERSPANI, + DECODE_TUNNEL_VXLAN, + DECODE_TUNNEL_VLAN, + DECODE_TUNNEL_IPV4, + DECODE_TUNNEL_IPV6, + DECODE_TUNNEL_IPV6_TEREDO, /**< separate protocol for stricter error handling */ + DECODE_TUNNEL_PPP, + DECODE_TUNNEL_NSH, + DECODE_TUNNEL_ARP, + DECODE_TUNNEL_UNSET +}; + +#ifndef SURICATA_BINDGEN_H //#define DBG_THREADS #define COUNTERS @@ -402,24 +421,6 @@ enum PacketDropReason { PKT_DROP_REASON_MAX, }; -enum PacketTunnelType { - PacketTunnelNone, - PacketTunnelRoot, - PacketTunnelChild, - DECODE_TUNNEL_ETHERNET, - DECODE_TUNNEL_ERSPANII, - DECODE_TUNNEL_ERSPANI, - DECODE_TUNNEL_VXLAN, - DECODE_TUNNEL_VLAN, - DECODE_TUNNEL_IPV4, - DECODE_TUNNEL_IPV6, - DECODE_TUNNEL_IPV6_TEREDO, /**< separate protocol for stricter error handling */ - DECODE_TUNNEL_PPP, - DECODE_TUNNEL_NSH, - DECODE_TUNNEL_ARP, - DECODE_TUNNEL_UNSET -}; - /* forward declaration since Packet struct definition requires this */ struct PacketQueue_; @@ -537,6 +538,11 @@ typedef struct Packet_ * has the exact same tuple as the lower levels */ uint8_t recursion_level; + /* tunnel id if any, PKT_TUNNEL_UNKNOWN if unknown, 0 if none + * tunnel ids are configured in suricata.yaml decoder.tunnels section + */ + uint16_t tunnel_id; + uint16_t vlan_id[VLAN_MAX_LAYERS]; uint8_t vlan_idx; @@ -562,7 +568,7 @@ typedef struct Packet_ uint32_t flow_hash; /* tunnel type: none, root or child */ - enum PacketTunnelType ttype; + enum SCPacketTunnelType ttype; SCTime_t ts; @@ -1111,7 +1117,8 @@ static inline void PacketTunnelSetVerdicted(Packet *p) } Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *parent, - const uint8_t *pkt, uint32_t len, enum PacketTunnelType proto); + const uint8_t *pkt, uint32_t len, enum SCPacketTunnelType proto); +void PacketGetTunnelId(Packet *p, uint32_t session); Packet *PacketDefragPktSetup(Packet *parent, const uint8_t *pkt, uint32_t len, uint8_t proto); void PacketDefragPktSetupParent(Packet *parent); void DecodeRegisterPerfCounters(DecodeThreadVars *, ThreadVars *); @@ -1159,6 +1166,7 @@ int DecodeVNTag(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uin int DecodeIEEE8021ah(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); int DecodeGeneve(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); int DecodeVXLAN(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); +int DecodeVXLANtunnel(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); int DecodeMPLS(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); int DecodeERSPAN(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); int DecodeERSPANTypeI(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); @@ -1511,5 +1519,6 @@ static inline bool DecodeNetworkLayer(ThreadVars *tv, DecodeThreadVars *dtv, } return true; } +#endif // SURICATA_BINDGEN_H #endif /* SURICATA_DECODE_H */ diff --git a/src/flow-hash.c b/src/flow-hash.c index 82de959c1887..884e05447b86 100644 --- a/src/flow-hash.c +++ b/src/flow-hash.c @@ -93,7 +93,7 @@ typedef struct FlowHashKey4_ { uint8_t recur; uint16_t livedev; uint16_t vlan_id[VLAN_MAX_LAYERS]; - uint16_t pad[1]; + uint16_t tunnel_id; }; const uint32_t u32[6]; }; @@ -108,7 +108,7 @@ typedef struct FlowHashKey6_ { uint8_t recur; uint16_t livedev; uint16_t vlan_id[VLAN_MAX_LAYERS]; - uint16_t pad[1]; + uint16_t tunnel_id; }; const uint32_t u32[12]; }; @@ -130,6 +130,7 @@ static inline void FlowHashIp4Fill(FlowHashKey4 *fhk, const Packet *p) fhk->vlan_id[0] = p->vlan_id[0] & g_vlan_mask; fhk->vlan_id[1] = p->vlan_id[1] & g_vlan_mask; fhk->vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + fhk->tunnel_id = p->tunnel_id; } static inline void FlowHashIp6Fill(FlowHashKey6 *fhk, const Packet *p) @@ -141,15 +142,14 @@ static inline void FlowHashIp6Fill(FlowHashKey6 *fhk, const Packet *p) fhk->vlan_id[0] = p->vlan_id[0] & g_vlan_mask; fhk->vlan_id[1] = p->vlan_id[1] & g_vlan_mask; fhk->vlan_id[2] = p->vlan_id[2] & g_vlan_mask; + fhk->tunnel_id = p->tunnel_id; } uint32_t FlowGetIpPairProtoHash(const Packet *p) { uint32_t hash = 0; if (PacketIsIPv4(p)) { - FlowHashKey4 fhk = { - .pad[0] = 0, - }; + FlowHashKey4 fhk = {}; int ai = (p->src.addr_data32[0] > p->dst.addr_data32[0]); fhk.addrs[1 - ai] = p->src.addr_data32[0]; @@ -162,9 +162,7 @@ uint32_t FlowGetIpPairProtoHash(const Packet *p) hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } else if (PacketIsIPv6(p)) { - FlowHashKey6 fhk = { - .pad[0] = 0, - }; + FlowHashKey6 fhk = {}; if (FlowHashRawAddressIPv6GtU32(p->src.addr_data32, p->dst.addr_data32)) { fhk.src[0] = p->src.addr_data32[0]; fhk.src[1] = p->src.addr_data32[1]; @@ -214,7 +212,7 @@ static inline uint32_t FlowGetHash(const Packet *p) if (PacketIsIPv4(p)) { if (PacketIsTCP(p) || PacketIsUDP(p)) { - FlowHashKey4 fhk = { .pad[0] = 0 }; + FlowHashKey4 fhk = {}; int ai = (p->src.addr_data32[0] > p->dst.addr_data32[0]); fhk.addrs[1-ai] = p->src.addr_data32[0]; @@ -231,7 +229,7 @@ static inline uint32_t FlowGetHash(const Packet *p) } else if (ICMPV4_DEST_UNREACH_IS_VALID(p)) { uint32_t psrc = IPV4_GET_RAW_IPSRC_U32(PacketGetICMPv4EmbIPv4(p)); uint32_t pdst = IPV4_GET_RAW_IPDST_U32(PacketGetICMPv4EmbIPv4(p)); - FlowHashKey4 fhk = { .pad[0] = 0 }; + FlowHashKey4 fhk = {}; const int ai = (psrc > pdst); fhk.addrs[1-ai] = psrc; @@ -247,7 +245,7 @@ static inline uint32_t FlowGetHash(const Packet *p) hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } else { - FlowHashKey4 fhk = { .pad[0] = 0 }; + FlowHashKey4 fhk = {}; const int ai = (p->src.addr_data32[0] > p->dst.addr_data32[0]); fhk.addrs[1-ai] = p->src.addr_data32[0]; fhk.addrs[ai] = p->dst.addr_data32[0]; @@ -258,7 +256,7 @@ static inline uint32_t FlowGetHash(const Packet *p) hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } } else if (PacketIsIPv6(p)) { - FlowHashKey6 fhk = { .pad[0] = 0 }; + FlowHashKey6 fhk = {}; if (FlowHashRawAddressIPv6GtU32(p->src.addr_data32, p->dst.addr_data32)) { fhk.src[0] = p->src.addr_data32[0]; fhk.src[1] = p->src.addr_data32[1]; @@ -303,9 +301,7 @@ uint32_t FlowKeyGetHash(FlowKey *fk) uint32_t hash = 0; if (fk->src.family == AF_INET) { - FlowHashKey4 fhk = { - .pad[0] = 0, - }; + FlowHashKey4 fhk = {}; int ai = (fk->src.address.address_un_data32[0] > fk->dst.address.address_un_data32[0]); fhk.addrs[1-ai] = fk->src.address.address_un_data32[0]; fhk.addrs[ai] = fk->dst.address.address_un_data32[0]; @@ -320,12 +316,11 @@ uint32_t FlowKeyGetHash(FlowKey *fk) fhk.vlan_id[0] = fk->vlan_id[0] & g_vlan_mask; fhk.vlan_id[1] = fk->vlan_id[1] & g_vlan_mask; fhk.vlan_id[2] = fk->vlan_id[2] & g_vlan_mask; + fhk.tunnel_id = fk->tunnel_id; hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } else { - FlowHashKey6 fhk = { - .pad[0] = 0, - }; + FlowHashKey6 fhk = {}; if (FlowHashRawAddressIPv6GtU32(fk->src.address.address_un_data32, fk->dst.address.address_un_data32)) { fhk.src[0] = fk->src.address.address_un_data32[0]; @@ -356,6 +351,7 @@ uint32_t FlowKeyGetHash(FlowKey *fk) fhk.vlan_id[0] = fk->vlan_id[0] & g_vlan_mask; fhk.vlan_id[1] = fk->vlan_id[1] & g_vlan_mask; fhk.vlan_id[2] = fk->vlan_id[2] & g_vlan_mask; + fhk.tunnel_id = fk->tunnel_id; hash = hashword(fhk.u32, ARRAY_SIZE(fhk.u32), flow_config.hash_rand); } @@ -399,7 +395,8 @@ static inline bool CmpLiveDevIds(const LiveDevice *livedev, const uint16_t id) #define CmpFlowMisc(x, y) \ (((x)->proto == (y)->proto) && \ ((x)->recursion_level == (y)->recursion_level || g_recurlvl_mask == 0) && \ - CmpVlanIds((x)->vlan_id, (y)->vlan_id)) + CmpVlanIds((x)->vlan_id, (y)->vlan_id)) && \ + ((x)->tunnel_id == (y)->tunnel_id) /* Since two or more flows can have the same hash key, we need to compare * the flow with the current packet or flow key. */ @@ -469,7 +466,7 @@ static inline int FlowCompareICMPv4(Flow *f, const Packet *p) f->sp == p->l4.vars.icmpv4.emb_sport && f->dp == p->l4.vars.icmpv4.emb_dport && f->proto == ICMPV4_GET_EMB_PROTO(p) && (f->recursion_level == p->recursion_level || g_recurlvl_mask == 0) && - CmpVlanIds(f->vlan_id, p->vlan_id) && + CmpVlanIds(f->vlan_id, p->vlan_id) && f->tunnel_id == p->tunnel_id && (f->livedev == p->livedev || g_livedev_mask == 0)) { return 1; @@ -480,7 +477,7 @@ static inline int FlowCompareICMPv4(Flow *f, const Packet *p) f->dp == p->l4.vars.icmpv4.emb_sport && f->sp == p->l4.vars.icmpv4.emb_dport && f->proto == ICMPV4_GET_EMB_PROTO(p) && (f->recursion_level == p->recursion_level || g_recurlvl_mask == 0) && - CmpVlanIds(f->vlan_id, p->vlan_id) && + CmpVlanIds(f->vlan_id, p->vlan_id) && f->tunnel_id == p->tunnel_id && (f->livedev == p->livedev || g_livedev_mask == 0)) { return 1; } @@ -1099,7 +1096,7 @@ Flow *FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t ha } f->proto = key->proto; memcpy(&f->vlan_id[0], &key->vlan_id[0], sizeof(f->vlan_id)); - ; + f->tunnel_id = key->tunnel_id; f->src.addr_data32[0] = key->src.addr_data32[0]; f->src.addr_data32[1] = key->src.addr_data32[1]; f->src.addr_data32[2] = key->src.addr_data32[2]; diff --git a/src/flow-util.c b/src/flow-util.c index 05508723a215..336284df531a 100644 --- a/src/flow-util.c +++ b/src/flow-util.c @@ -151,6 +151,7 @@ void FlowInit(ThreadVars *tv, Flow *f, const Packet *p) f->proto = p->proto; f->recursion_level = p->recursion_level; + f->tunnel_id = p->tunnel_id; memcpy(&f->vlan_id[0], &p->vlan_id[0], sizeof(f->vlan_id)); f->vlan_idx = p->vlan_idx; diff --git a/src/flow.h b/src/flow.h index 1a72fce49f49..4170fdce19c5 100644 --- a/src/flow.h +++ b/src/flow.h @@ -305,6 +305,7 @@ typedef struct FlowKey_ uint8_t recursion_level; uint16_t livedev_id; uint16_t vlan_id[VLAN_MAX_LAYERS]; + uint16_t tunnel_id; } FlowKey; typedef struct FlowAddress_ { @@ -369,6 +370,7 @@ typedef struct Flow_ }; uint8_t proto; uint8_t recursion_level; + uint16_t tunnel_id; uint16_t vlan_id[VLAN_MAX_LAYERS]; uint8_t vlan_idx; diff --git a/src/output-json-alert.c b/src/output-json-alert.c index 184370a65edc..9dcbfceb1b1f 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -296,6 +296,9 @@ static void AlertJsonTunnel(const Packet *p, SCJsonBuilder *js) SCJbSetUint(js, "dest_port", addr.dp); SCJbSetString(js, "proto", addr.proto); + if (p->tunnel_id > 0 && p->tunnel_id != PKT_TUNNEL_UNKNOWN) { + SCJbSetUint(js, "tunnel_id", p->tunnel_id); + } SCJbSetUint(js, "depth", p->recursion_level); if (pcap_cnt != 0) { SCJbSetUint(js, "pcap_cnt", pcap_cnt); diff --git a/src/output-json-flow.c b/src/output-json-flow.c index a57160c602b5..ffbff90c6d65 100644 --- a/src/output-json-flow.c +++ b/src/output-json-flow.c @@ -150,6 +150,9 @@ static SCJsonBuilder *CreateEveHeaderFromFlow(const Flow *f) SCJbSetUint(jb, "ip_v", 6); } + if (f->tunnel_id > 0 && f->tunnel_id != PKT_TUNNEL_UNKNOWN) { + SCJbSetUint(jb, "tunnel_id", f->tunnel_id); + } if (SCProtoNameValid(f->proto)) { SCJbSetString(jb, "proto", known_proto[f->proto]); } else { diff --git a/src/output-json.c b/src/output-json.c index 1e04e13c49dd..5467db04d9da 100644 --- a/src/output-json.c +++ b/src/output-json.c @@ -924,7 +924,9 @@ SCJsonBuilder *CreateEveHeader(const Packet *p, enum SCOutputJsonLogDirection di } else if (PacketIsIPv6(p)) { SCJbSetUint(js, "ip_v", 6); } - + if (p->tunnel_id > 0 && p->tunnel_id != PKT_TUNNEL_UNKNOWN) { + SCJbSetUint(js, "tunnel_id", p->tunnel_id); + } /* icmp */ switch (p->proto) { case IPPROTO_ICMP: diff --git a/suricata.yaml.in b/suricata.yaml.in index 9b96a59ea3d5..dc1ca2182475 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1758,6 +1758,14 @@ decoder: # has put the headers on, like when using netmap driver pickup. recursion-level: use-for-tracking: true + # If your packets sources are tunnels encapsulating the traffic, + # you can list here these tunnels and assign identifiers to them. + # tunnels: + # - id: 1 + # type: erspan2 + # src: 192.168.1.1 + # dst: 192.168.1.3 + # session: 123 ## ## Performance tuning and profiling From 71d66811784013baa77d3a3716e1f5893fdb921b Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Fri, 23 May 2025 14:10:52 +0200 Subject: [PATCH 07/11] ebpf: factorize duplicated code for key setting --- src/source-af-packet.c | 59 +++++++++++------------------------------- 1 file changed, 15 insertions(+), 44 deletions(-) diff --git a/src/source-af-packet.c b/src/source-af-packet.c index 6e636b388f0d..1439709e82fc 100644 --- a/src/source-af-packet.c +++ b/src/source-af-packet.c @@ -2232,6 +2232,13 @@ static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1, return 1; } +#define FlowKeyIpFill(k, p) \ + { \ + (k)->ip_proto = ((p)->proto == IPPROTO_TCP) ? 1 : 0; \ + (k)->vlan0 = (p)->vlan_id[0]; \ + (k)->vlan1 = (p)->vlan_id[1]; \ + } + /** * Bypass function for AF_PACKET capture in eBPF mode * @@ -2278,16 +2285,9 @@ static int AFPBypassCallback(Packet *p) } keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p)); keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p)); + FlowKeyIpFill(keys[0], p); keys[0]->port16[0] = p->sp; keys[0]->port16[1] = p->dp; - keys[0]->vlan0 = p->vlan_id[0]; - keys[0]->vlan1 = p->vlan_id[1]; - - if (p->proto == IPPROTO_TCP) { - keys[0]->ip_proto = 1; - } else { - keys[0]->ip_proto = 0; - } if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0], p->afp_v.nr_cpus) == 0) { LiveDevAddBypassFail(p->livedev, 1, AF_INET); @@ -2305,10 +2305,7 @@ static int AFPBypassCallback(Packet *p) keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p)); keys[1]->port16[0] = p->dp; keys[1]->port16[1] = p->sp; - keys[1]->vlan0 = p->vlan_id[0]; - keys[1]->vlan1 = p->vlan_id[1]; - - keys[1]->ip_proto = keys[0]->ip_proto; + FlowKeyIpFill(keys[1], p); if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1], p->afp_v.nr_cpus) == 0) { EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]); @@ -2339,14 +2336,7 @@ static int AFPBypassCallback(Packet *p) } keys[0]->port16[0] = p->sp; keys[0]->port16[1] = p->dp; - keys[0]->vlan0 = p->vlan_id[0]; - keys[0]->vlan1 = p->vlan_id[1]; - - if (p->proto == IPPROTO_TCP) { - keys[0]->ip_proto = 1; - } else { - keys[0]->ip_proto = 0; - } + FlowKeyIpFill(keys[0], p); if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0], p->afp_v.nr_cpus) == 0) { LiveDevAddBypassFail(p->livedev, 1, AF_INET6); @@ -2366,10 +2356,7 @@ static int AFPBypassCallback(Packet *p) } keys[1]->port16[0] = p->dp; keys[1]->port16[1] = p->sp; - keys[1]->vlan0 = p->vlan_id[0]; - keys[1]->vlan1 = p->vlan_id[1]; - - keys[1]->ip_proto = keys[0]->ip_proto; + FlowKeyIpFill(keys[1], p); if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1], p->afp_v.nr_cpus) == 0) { EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]); @@ -2433,13 +2420,7 @@ static int AFPXDPBypassCallback(Packet *p) * (as in eBPF filter) so we need to pass from host to network order */ keys[0]->port16[0] = htons(p->sp); keys[0]->port16[1] = htons(p->dp); - keys[0]->vlan0 = p->vlan_id[0]; - keys[0]->vlan1 = p->vlan_id[1]; - if (p->proto == IPPROTO_TCP) { - keys[0]->ip_proto = 1; - } else { - keys[0]->ip_proto = 0; - } + FlowKeyIpFill(keys[0], p); if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0], p->afp_v.nr_cpus) == 0) { LiveDevAddBypassFail(p->livedev, 1, AF_INET); @@ -2457,9 +2438,7 @@ static int AFPXDPBypassCallback(Packet *p) keys[1]->dst = p->src.addr_data32[0]; keys[1]->port16[0] = htons(p->dp); keys[1]->port16[1] = htons(p->sp); - keys[1]->vlan0 = p->vlan_id[0]; - keys[1]->vlan1 = p->vlan_id[1]; - keys[1]->ip_proto = keys[0]->ip_proto; + FlowKeyIpFill(keys[1], p); if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1], p->afp_v.nr_cpus) == 0) { EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]); @@ -2489,13 +2468,7 @@ static int AFPXDPBypassCallback(Packet *p) } keys[0]->port16[0] = htons(p->sp); keys[0]->port16[1] = htons(p->dp); - keys[0]->vlan0 = p->vlan_id[0]; - keys[0]->vlan1 = p->vlan_id[1]; - if (p->proto == IPPROTO_TCP) { - keys[0]->ip_proto = 1; - } else { - keys[0]->ip_proto = 0; - } + FlowKeyIpFill(keys[0], p); if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0], p->afp_v.nr_cpus) == 0) { LiveDevAddBypassFail(p->livedev, 1, AF_INET6); @@ -2515,9 +2488,7 @@ static int AFPXDPBypassCallback(Packet *p) } keys[1]->port16[0] = htons(p->dp); keys[1]->port16[1] = htons(p->sp); - keys[1]->vlan0 = p->vlan_id[0]; - keys[1]->vlan1 = p->vlan_id[1]; - keys[1]->ip_proto = keys[0]->ip_proto; + FlowKeyIpFill(keys[1], p); if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1], p->afp_v.nr_cpus) == 0) { EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]); From 3b65d4c313705074629b21a481eb84a86ab4986c Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 24 Apr 2025 10:25:01 +0200 Subject: [PATCH 08/11] xdp: handle erspan2 tunnels Ticket: 7674 --- ebpf/xdp_filter.c | 165 ++++++++++++++++++++++++++++++++++++++-- rust/src/decode.rs | 45 +++++++++++ src/decode.c | 10 ++- src/decode.h | 2 + src/runmode-af-packet.c | 1 + src/source-af-packet.c | 36 ++++++--- src/source-af-packet.h | 5 ++ src/util-ebpf.c | 64 +++++++++++++++- src/util-ebpf.h | 18 ++++- 9 files changed, 326 insertions(+), 20 deletions(-) diff --git a/ebpf/xdp_filter.c b/ebpf/xdp_filter.c index 6c5e8b644aec..99f221537131 100644 --- a/ebpf/xdp_filter.c +++ b/ebpf/xdp_filter.c @@ -23,6 +23,10 @@ #include #include #include +/* Workaround to avoid the need of 32bit headers */ +#define _LINUX_IF_H +#define IFNAMSIZ 16 +#include #include #include #include @@ -66,6 +70,13 @@ struct vlan_hdr { __u16 h_vlan_encapsulated_proto; }; +struct flowtunnel_keys { + __u32 src; + __u32 dst; + __u32 session : 24; + __u8 tunnel : 8; +}; + struct flowv4_keys { __u32 src; __u32 dst; @@ -75,7 +86,8 @@ struct flowv4_keys { }; __u8 ip_proto:1; __u16 vlan0:15; - __u16 vlan1; + __u8 tunnel : 1; + __u16 vlan1_or_tunnel_id : 15; }; struct flowv6_keys { @@ -87,7 +99,8 @@ struct flowv6_keys { }; __u8 ip_proto:1; __u16 vlan0:15; - __u16 vlan1; + __u8 tunnel : 1; + __u16 vlan1_or_tunnel_id : 15; }; struct pair { @@ -95,6 +108,21 @@ struct pair { __u64 bytes; }; +struct flowtunnel_id { + __u16 tunnel_id; +}; + +struct { +#if USE_PERCPU_HASH + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); +#else + __uint(type, BPF_MAP_TYPE_HASH); +#endif + __type(key, struct flowtunnel_keys); + __type(value, struct flowtunnel_id); + __uint(max_entries, 256); +} flow_table_tunnels SEC(".maps"); + struct { #if USE_PERCPU_HASH __uint(type, BPF_MAP_TYPE_PERCPU_HASH); @@ -232,7 +260,8 @@ static __always_inline int get_dport(void *trans_data, void *data_end, } } -static int __always_inline filter_ipv4(struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, __u16 vlan0, __u16 vlan1) +static int __always_inline filter_ipv4_final( + struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, __u16 vlan0, __u16 vlan1) { struct iphdr *iph = data + nh_off; int dport; @@ -280,7 +309,8 @@ static int __always_inline filter_ipv4(struct xdp_md *ctx, void *data, __u64 nh_ tuple.port16[1] = (__u16)dport; tuple.vlan0 = vlan0; - tuple.vlan1 = vlan1; + tuple.tunnel = (vlan1 & 0x8000) != 0; + tuple.vlan1_or_tunnel_id = vlan1 & 0x7FFF; value = bpf_map_lookup_elem(&flow_table_v4, &tuple); #if 0 @@ -421,7 +451,8 @@ static int __always_inline filter_ipv6(struct xdp_md *ctx, void *data, __u64 nh_ tuple.port16[1] = dport; tuple.vlan0 = vlan0; - tuple.vlan1 = vlan1; + tuple.tunnel = (vlan1 & 0x8000) != 0; + tuple.vlan1_or_tunnel_id = vlan1 & 0x7FFF; value = bpf_map_lookup_elem(&flow_table_v6, &tuple); if (value) { @@ -482,6 +513,130 @@ static int __always_inline filter_ipv6(struct xdp_md *ctx, void *data, __u64 nh_ #endif } +static int __always_inline filter_erspan( + struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, struct iphdr *iph) +{ + struct erspan_hdr { + __be16 ver_vlan; + __be16 flags_spanid; + __be32 padding; + }; + __u16 vlan0 = 0; + __u16 vlan1 = 0; + struct flowtunnel_keys tuple; + __u16 h_proto; + __u16 flags_spanid; + struct flowtunnel_id *value; + + struct erspan_hdr *erhdr = (struct erspan_hdr *)(data + nh_off); + if ((void *)(erhdr + 1) > data_end) + return XDP_PASS; + + if ((erhdr->ver_vlan & 0xF0) != 0x10) { + // only handle ERSPAN 2 + return XDP_PASS; + } + flags_spanid = erhdr->flags_spanid; + if ((flags_spanid & 0x1800) == 0x800) { + // do not handle ISL encapsulated + return XDP_PASS; + } + + tuple.tunnel = 4; // DECODE_TUNNEL_ERSPANII + tuple.src = iph->saddr; + tuple.dst = iph->daddr; + tuple.session = flags_spanid & 0x3FF; + value = bpf_map_lookup_elem(&flow_table_tunnels, &tuple); + if (!value) { + // unknown tunnel + return XDP_PASS; + } + vlan1 = 0x8000 | value->tunnel_id; + + nh_off += 8; + if (data + nh_off + sizeof(struct ethhdr) > data_end) + return XDP_PASS; + + struct ethhdr *eth = data + nh_off; + nh_off += sizeof(*eth); + + h_proto = eth->h_proto; + +#if VLAN_TRACKING + if ((flags_spanid & 0x1800) == 0x1000) { + vlan0 = erhdr->ver_vlan & 0xFFF; + } +#endif + if ((flags_spanid & 0x1800) == 0x1800 && (h_proto == __constant_htons(ETH_P_8021Q) || + h_proto == __constant_htons(ETH_P_8021AD))) { + struct vlan_hdr *vhdr; + + if (data + nh_off + sizeof(struct vlan_hdr) > data_end) + return XDP_PASS; + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + h_proto = vhdr->h_vlan_encapsulated_proto; +#if VLAN_TRACKING + vlan0 = vhdr->h_vlan_TCI & 0x0fff; +#endif + } + if (h_proto == __constant_htons(ETH_P_IP)) + return filter_ipv4_final(ctx, data, nh_off, data_end, vlan0, vlan1); + else if (h_proto == __constant_htons(ETH_P_IPV6)) + return filter_ipv6(ctx, data, nh_off, data_end, vlan0, vlan1); + return XDP_PASS; +} + +static int __always_inline filter_gre( + struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, struct iphdr *iph) +{ + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + __u16 proto; + + struct gre_hdr *grhdr = (struct gre_hdr *)(data + nh_off); + + if ((void *)(grhdr + 1) > data_end) + return XDP_PASS; + + // only GRE version 0 without routing + if (grhdr->flags & (GRE_VERSION | GRE_ROUTING)) + return XDP_PASS; + + nh_off += 4; + if (grhdr->flags & GRE_CSUM) + nh_off += 4; + if (grhdr->flags & GRE_KEY) + nh_off += 4; + if (grhdr->flags & GRE_SEQ) + nh_off += 4; + if (data + nh_off > data_end) + return XDP_PASS; + + proto = grhdr->proto; + // only handle erspan over gre + if (proto == __constant_htons(ETH_P_ERSPAN)) { + return filter_erspan(ctx, data, nh_off, data_end, iph); + } + return XDP_PASS; +} + +static int __always_inline filter_ipv4( + struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, __u16 vlan0, __u16 vlan1) +{ + struct iphdr *iph = data + nh_off; + if ((void *)(iph + 1) > data_end) + return XDP_PASS; + + if (iph->protocol == IPPROTO_GRE) { + nh_off += sizeof(struct iphdr); + return filter_gre(ctx, data, nh_off, data_end, iph); + } + return filter_ipv4_final(ctx, data, nh_off, data_end, vlan0, vlan1); +} + int SEC("xdp") xdp_hashfilter(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/rust/src/decode.rs b/rust/src/decode.rs index 555c5cbc8af2..8d635b93b6ac 100644 --- a/rust/src/decode.rs +++ b/rust/src/decode.rs @@ -17,8 +17,10 @@ //! Decode module. +use crate::cast_pointer; use crate::conf::conf_get_node; use std::collections::HashMap; +use std::ffi::c_void; use std::net::Ipv4Addr; use suricata_sys::sys::SCPacketTunnelType; @@ -132,3 +134,46 @@ pub unsafe extern "C" fn DecodeTunnelsId( None => PKT_TUNNEL_UNKNOWN, } } + +#[no_mangle] +pub unsafe extern "C" fn DecodeTunnelsIterStart( + map: *mut HashMap, +) -> *mut c_void { + if map.is_null() { + return std::ptr::null_mut(); + } + let map = &*map; + return Box::into_raw(Box::new(map.iter())) as *mut _; +} + +#[no_mangle] +pub unsafe extern "C" fn DecodeTunnelsMapIter( + iter: *mut c_void, key: &mut flowtunnel_keys, value: &mut u16, +) -> bool { + if iter.is_null() { + return false; + } + let iter = cast_pointer!( + iter, + std::collections::hash_map::Iter<'static, flowtunnel_keys, u16> + ); + match iter.next() { + Some((k, v)) => { + *key = k.clone(); + *value = *v; + return true; + } + None => { + return false; + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn DecodeTunnelsMapIterEnd(iter: *mut c_void) { + let iter = cast_pointer!( + iter, + std::collections::hash_map::Iter<'static, flowtunnel_keys, u16> + ); + let _ = Box::from_raw(iter); +} diff --git a/src/decode.c b/src/decode.c index 99852bac5f6c..3674d8e9ca59 100644 --- a/src/decode.c +++ b/src/decode.c @@ -398,6 +398,11 @@ void PacketGetTunnelId(Packet *p, uint32_t session) p->tunnel_id = DecodeTunnelsId(decode_tunnels_map, k); } +void *DecodeTunnelsGetMapIter(void) +{ + return DecodeTunnelsIterStart(decode_tunnels_map); +} + /** * \brief Setup a pseudo packet (tunnel) * @@ -436,7 +441,10 @@ Packet *PacketTunnelPktSetup(ThreadVars *tv, DecodeThreadVars *dtv, Packet *pare p->datalink = DLT_RAW; p->tenant_id = parent->tenant_id; p->livedev = parent->livedev; - +#ifdef HAVE_PACKET_EBPF + // need to copy BypassPacketsFlow callback and such + AFPReadCopyBypass(p, parent); +#endif /* set the root ptr to the lowest layer */ if (parent->root != NULL) { p->root = parent->root; diff --git a/src/decode.h b/src/decode.h index 15530f32d456..c58a18389629 100644 --- a/src/decode.h +++ b/src/decode.h @@ -1143,6 +1143,8 @@ void DecodeUpdatePacketCounters(ThreadVars *tv, const DecodeThreadVars *dtv, const Packet *p); const char *PacketDropReasonToString(enum PacketDropReason r); +void *DecodeTunnelsGetMapIter(void); + /* decoder functions */ int DecodeEthernet(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); int DecodeSll(ThreadVars *, DecodeThreadVars *, Packet *, const uint8_t *, uint32_t); diff --git a/src/runmode-af-packet.c b/src/runmode-af-packet.c index 940eece3a0e8..50641c04a59d 100644 --- a/src/runmode-af-packet.c +++ b/src/runmode-af-packet.c @@ -585,6 +585,7 @@ static void *ParseAFPConfig(const char *iface) /* It will just set CPU count to 0 */ EBPFBuildCPUSet(NULL, aconf->iface); } + EBPFLoadTunnels(aconf->iface, aconf->ebpf_t_config.cpus_count); } /* we have a peer and we use bypass so we can set up XDP iface redirect */ if (aconf->out_iface) { diff --git a/src/source-af-packet.c b/src/source-af-packet.c index 1439709e82fc..9f6859d64e79 100644 --- a/src/source-af-packet.c +++ b/src/source-af-packet.c @@ -60,6 +60,7 @@ #include "flow-storage.h" #include "util-validate.h" #include "action-globals.h" +#include "rust.h" #ifdef HAVE_AF_PACKET @@ -736,6 +737,16 @@ static inline int AFPSuriFailure(AFPThreadVars *ptv, union thdr h) SCReturnInt(AFP_SURI_FAILURE); } +#ifdef HAVE_PACKET_EBPF +void AFPReadCopyBypass(Packet *dst, Packet *src) +{ + dst->BypassPacketsFlow = src->BypassPacketsFlow; + dst->afp_v.v4_map_fd = src->afp_v.v4_map_fd; + dst->afp_v.v6_map_fd = src->afp_v.v6_map_fd; + dst->afp_v.nr_cpus = src->afp_v.nr_cpus; +} +#endif + static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p) { #ifdef HAVE_PACKET_EBPF @@ -2232,11 +2243,20 @@ static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1, return 1; } +// unsupported by ebpf : tunnel with multiple layers of vlans inside #define FlowKeyIpFill(k, p) \ { \ (k)->ip_proto = ((p)->proto == IPPROTO_TCP) ? 1 : 0; \ (k)->vlan0 = (p)->vlan_id[0]; \ - (k)->vlan1 = (p)->vlan_id[1]; \ + if ((p)->tunnel_id) { \ + if ((p)->vlan_id[1]) { \ + return 0; \ + } \ + (k)->tunnel = 1; \ + (k)->vlan1_or_tunnel_id = (p)->tunnel_id; \ + } else { \ + (k)->vlan1_or_tunnel_id = (p)->vlan_id[1]; \ + } \ } /** @@ -2267,10 +2287,9 @@ static int AFPBypassCallback(Packet *p) if (p->flow == NULL) { return 0; } - /* Bypassing tunneled packets is currently not supported - * because we can't discard the inner packet only due to - * primitive parsing in eBPF */ - if (PacketIsTunnel(p)) { + /* Bypassing tunneled packets is now supported based on the + * configured tunnel with their ids */ + if (p->tunnel_id == PKT_TUNNEL_UNKNOWN) { return 0; } if (PacketIsIPv4(p)) { @@ -2397,10 +2416,9 @@ static int AFPXDPBypassCallback(Packet *p) if (p->flow == NULL) { return 0; } - /* Bypassing tunneled packets is currently not supported - * because we can't discard the inner packet only due to - * primitive parsing in eBPF */ - if (PacketIsTunnel(p)) { + /* Bypassing tunneled packets is now supported based on the + * configured tunnel with their ids */ + if (p->tunnel_id == PKT_TUNNEL_UNKNOWN) { return 0; } if (PacketIsIPv4(p)) { diff --git a/src/source-af-packet.h b/src/source-af-packet.h index 29bedcb41808..9913ff922982 100644 --- a/src/source-af-packet.h +++ b/src/source-af-packet.h @@ -203,4 +203,9 @@ int AFPGetLinkType(const char *ifname); int AFPIsFanoutSupported(uint16_t cluster_id); +#ifdef HAVE_PACKET_EBPF +typedef struct Packet_ Packet; +void AFPReadCopyBypass(Packet *dst, Packet *src); +#endif + #endif /* SURICATA_SOURCE_AFP_H */ diff --git a/src/util-ebpf.c b/src/util-ebpf.c index 1d2d44681f5f..160241db32d2 100644 --- a/src/util-ebpf.c +++ b/src/util-ebpf.c @@ -55,6 +55,7 @@ #include #include #include "autoconf.h" +#include "rust.h" #define BPF_MAP_MAX_COUNT 16 @@ -181,7 +182,7 @@ static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file) static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config) { - int fd_v4 = -1, fd_v6 = -1; + int fd_v4 = -1, fd_v6 = -1, fd_tunnel = -1; /* First try to load the eBPF check map and return if found */ if (config->pinned_maps_name) { @@ -205,6 +206,9 @@ static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *c SCLogWarning("Found a flow_table_v4 map but no flow_table_v6 map"); return fd_v6; } + + /* Get flow tunnel table if it exists */ + fd_tunnel = EBPFLoadPinnedMapsFile(livedev, "flow_table_tunnels"); } struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data)); @@ -225,6 +229,14 @@ static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *c goto alloc_error; } bpf_map_data->last = 2; + if (fd_tunnel > 0) { + bpf_map_data->array[2].fd = fd_tunnel; + bpf_map_data->array[2].name = SCStrdup("flow_table_tunnels"); + if (bpf_map_data->array[2].name == NULL) { + goto alloc_error; + } + bpf_map_data->last++; + } } else { bpf_map_data->last = 0; } @@ -425,6 +437,12 @@ int EBPFLoadFile(const char *iface, const char *path, const char * section, break; } } + if (strcmp(bpf_map__name(map), "flow_table_tunnels") == 0) { + if (bpf_map__key_size(map) != sizeof(struct flowtunnel_keys_ebpf)) { + SCLogError("Incompatible flow_table_tunnels"); + break; + } + } SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map)); bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map); bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map)); @@ -760,7 +778,11 @@ static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char flow_key.dst.addr_data32[2] = 0; flow_key.dst.addr_data32[3] = 0; flow_key.vlan_id[0] = next_key.vlan0; - flow_key.vlan_id[1] = next_key.vlan1; + if (next_key.tunnel) { + flow_key.tunnel_id = next_key.vlan1_or_tunnel_id; + } else { + flow_key.vlan_id[1] = next_key.vlan1_or_tunnel_id; + } if (next_key.ip_proto == 1) { flow_key.proto = IPPROTO_TCP; } else { @@ -878,7 +900,11 @@ static int EBPFForEachFlowV6Table(ThreadVars *th_v, flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]); } flow_key.vlan_id[0] = next_key.vlan0; - flow_key.vlan_id[1] = next_key.vlan1; + if (next_key.tunnel) { + flow_key.tunnel_id = next_key.vlan1_or_tunnel_id; + } else { + flow_key.vlan_id[1] = next_key.vlan1_or_tunnel_id; + } if (next_key.ip_proto == 1) { flow_key.proto = IPPROTO_TCP; } else { @@ -976,6 +1002,38 @@ static void EBPFRedirectMapAddCPU(int i, void *data) } } +void EBPFLoadTunnels(const char *iface, unsigned int nr_cpus) +{ + BPF_DECLARE_PERCPU(struct flowtunnel_id, value, nr_cpus); + + int mapfd = EBPFGetMapFDByName(iface, "flow_table_tunnels"); + if (mapfd < 0) { + return; + } + struct flowtunnel_keys key; + struct flowtunnel_keys_ebpf *key_ebpf = SCCalloc(1, sizeof(struct flowtunnel_keys_ebpf)); + if (key_ebpf == NULL) { + return; + } + uint16_t tunnel_id; + void *iter = DecodeTunnelsGetMapIter(); + + while (DecodeTunnelsMapIter(iter, &key, &tunnel_id)) { + for (unsigned int i = 0; i < nr_cpus; i++) { + BPF_PERCPU(value, i).tunnel_id = tunnel_id; + } + key_ebpf->src = key.src; + key_ebpf->dst = key.dst; + key_ebpf->session = key.session; + key_ebpf->tunnel_type = key.tunnel_type; + if (bpf_map_update_elem(mapfd, key_ebpf, value, BPF_NOEXIST) != 0) { + SCLogError("Can't update eBPF tunnels map: %s (%d)", strerror(errno), errno); + return; + } + } + DecodeTunnelsMapIterEnd(iter); +} + void EBPFBuildCPUSet(SCConfNode *node, char *iface) { uint32_t key0 = 0; diff --git a/src/util-ebpf.h b/src/util-ebpf.h index 68c7b86959ed..788c73da445a 100644 --- a/src/util-ebpf.h +++ b/src/util-ebpf.h @@ -34,6 +34,12 @@ #define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_HW_MODE (1U << 3) +struct flowtunnel_keys_ebpf { + __be32 src; + __be32 dst; + __u32 session : 24; // vni or spanid + __u8 tunnel_type : 8; +}; struct flowv4_keys { __be32 src; @@ -44,7 +50,8 @@ struct flowv4_keys { }; __u8 ip_proto:1; __u16 vlan0:15; - __u16 vlan1; + __u8 tunnel : 1; + __u16 vlan1_or_tunnel_id : 15; }; struct flowv6_keys { @@ -56,7 +63,8 @@ struct flowv6_keys { }; __u8 ip_proto:1; __u16 vlan0:15; - __u16 vlan1; + __u8 tunnel : 1; + __u16 vlan1_or_tunnel_id : 15; }; struct pair { @@ -64,6 +72,10 @@ struct pair { uint64_t bytes; }; +struct flowtunnel_id { + uint16_t tunnel_id; +}; + typedef struct EBPFBypassData_ { void *key[2]; int mapfd; @@ -88,6 +100,8 @@ void EBPFBuildCPUSet(SCConfNode *node, char *iface); int EBPFSetPeerIface(const char *iface, const char *out_iface); +void EBPFLoadTunnels(const char *iface, unsigned int nr_cpus); + int EBPFUpdateFlow(Flow *f, Packet *p, void *data); bool EBPFBypassUpdate(Flow *f, void *data, time_t tsec); void EBPFBypassFree(void *data); From 9c89f8a1d7f6535f7c4c05bdd4db37afd5555476 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Fri, 23 May 2025 14:36:49 +0200 Subject: [PATCH 09/11] xdp: handle vxlan tunnels Ticket: 7674 --- ebpf/xdp_filter.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/ebpf/xdp_filter.c b/ebpf/xdp_filter.c index 99f221537131..f2fc241c26e9 100644 --- a/ebpf/xdp_filter.c +++ b/ebpf/xdp_filter.c @@ -65,6 +65,9 @@ * also be used as workaround of some hardware offload issue */ #define VLAN_TRACKING 1 +/* vxlan port configurable */ +#define VXLAN_PORT 4789 + struct vlan_hdr { __u16 h_vlan_TCI; __u16 h_vlan_encapsulated_proto; @@ -623,6 +626,92 @@ static int __always_inline filter_gre( return XDP_PASS; } +struct vxlanhdr { + __be16 flags; + __be16 gdp; + __u8 vni0; + __u8 vni1; + __u8 vni2; + __u8 res; +}; + +static int __always_inline filter_vxlan( + struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, struct iphdr *iph) +{ + __u16 vlan0 = 0; + __u16 vlan1; + __u16 h_proto; + struct flowtunnel_keys tuple; + struct flowtunnel_id *value; + + struct vxlanhdr *vh = (struct vxlanhdr *)(data + nh_off); + + tuple.tunnel = 6; // DECODE_TUNNEL_VXLAN + tuple.src = iph->saddr; + tuple.dst = iph->daddr; + tuple.session = vh->vni2 | (vh->vni1 << 8) | (vh->vni0 << 16); + value = bpf_map_lookup_elem(&flow_table_tunnels, &tuple); + if (!value) { + // unknown tunnel + return XDP_PASS; + } + vlan1 = 0x8000 | value->tunnel_id; + nh_off += sizeof(*vh); + + struct ethhdr *eth = data + nh_off; + nh_off += sizeof(*eth); + h_proto = eth->h_proto; + + if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + if (data + nh_off + sizeof(struct vlan_hdr) > data_end) + return XDP_PASS; + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + h_proto = vhdr->h_vlan_encapsulated_proto; +#if VLAN_TRACKING + vlan0 = vhdr->h_vlan_TCI & 0x0fff; +#endif + } + + if (h_proto == __constant_htons(ETH_P_IP)) + return filter_ipv4_final(ctx, data, nh_off, data_end, vlan0, vlan1); + else if (h_proto == __constant_htons(ETH_P_IPV6)) + return filter_ipv6(ctx, data, nh_off, data_end, vlan0, vlan1); + return XDP_PASS; +} + +static int __always_inline is_vxlan(void *data, __u64 nh_off, void *data_end) +{ + if (data + nh_off + sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) + + sizeof(struct ethhdr) > + data_end) { + return 0; + } + struct udphdr *uh = (struct udphdr *)(data + nh_off + sizeof(struct iphdr)); + if (uh->dest != __constant_ntohs(VXLAN_PORT)) { + return 0; + } + struct vxlanhdr *vh = + (struct vxlanhdr *)(data + nh_off + sizeof(struct iphdr) + sizeof(struct udphdr)); + // check vni is present and reserved is 0 + if ((vh->flags & 0xDEFF) == 8 && vh->res == 0) { + return 0; + } + // check ethernet type is handled + struct ethhdr *eth = (struct ethhdr *)(data + nh_off + sizeof(struct iphdr) + + sizeof(struct udphdr) + sizeof(struct vxlanhdr)); + if (eth->h_proto == __constant_htons(ETH_P_8021Q) || + eth->h_proto == __constant_htons(ETH_P_8021AD) || + eth->h_proto == __constant_htons(ETH_P_IP) || + eth->h_proto == __constant_htons(ETH_P_IPV6)) { + return 1; + } + + return 0; +} + static int __always_inline filter_ipv4( struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, __u16 vlan0, __u16 vlan1) { @@ -633,6 +722,9 @@ static int __always_inline filter_ipv4( if (iph->protocol == IPPROTO_GRE) { nh_off += sizeof(struct iphdr); return filter_gre(ctx, data, nh_off, data_end, iph); + } else if (iph->protocol == IPPROTO_UDP && is_vxlan(data, nh_off, data_end)) { + nh_off += sizeof(struct iphdr) + sizeof(struct udphdr); + return filter_vxlan(ctx, data, nh_off, data_end, iph); } return filter_ipv4_final(ctx, data, nh_off, data_end, vlan0, vlan1); } From c039a176c0fd5b1d8a87a131e4a80e968c31cfcc Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Fri, 23 May 2025 16:18:16 +0200 Subject: [PATCH 10/11] detect: tunnel_id can be a selector for multi-tenants Tciket: 7674 --- src/detect-engine.c | 95 +++++++++++++++++++++++++++++++++++++++++++++ src/detect-engine.h | 1 + src/detect.h | 12 +++--- 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/src/detect-engine.c b/src/detect-engine.c index 41566c0ee9b8..85b9ef4e0f18 100644 --- a/src/detect-engine.c +++ b/src/detect-engine.c @@ -102,6 +102,7 @@ static DetectEngineMasterCtx g_master_de_ctx = { SCMUTEX_INITIALIZER, static uint32_t TenantIdHash(HashTable *h, void *data, uint16_t data_len); static char TenantIdCompare(void *d1, uint16_t d1_len, void *d2, uint16_t d2_len); static void TenantIdFree(void *d); +static uint32_t DetectEngineTenantGetIdFromTunnel(const void *ctx, const Packet *p); static uint32_t DetectEngineTenantGetIdFromLivedev(const void *ctx, const Packet *p); static uint32_t DetectEngineTenantGetIdFromVlanId(const void *ctx, const Packet *p); static uint32_t DetectEngineTenantGetIdFromPcap(const void *ctx, const Packet *p); @@ -3251,6 +3252,10 @@ static TmEcode DetectEngineThreadCtxInitForMT(ThreadVars *tv, DetectEngineThread det_ctx->TenantGetId = DetectEngineTenantGetIdFromLivedev; SCLogDebug("TENANT_SELECTOR_LIVEDEV"); break; + case TENANT_SELECTOR_TUNNEL: + det_ctx->TenantGetId = DetectEngineTenantGetIdFromTunnel; + SCLogDebug("TENANT_SELECTOR_TUNNEL"); + break; case TENANT_SELECTOR_DIRECT: det_ctx->TenantGetId = DetectEngineTenantGetIdFromPcap; SCLogDebug("TENANT_SELECTOR_DIRECT"); @@ -4172,6 +4177,57 @@ int DetectEngineReloadTenantsBlocking(const int reload_cnt) return 0; } +static int DetectEngineMultiTenantSetupLoadTunnelMappings( + const SCConfNode *mappings_root_node, bool failure_fatal) +{ + SCConfNode *mapping_node = NULL; + + int mapping_cnt = 0; + if (mappings_root_node != NULL) { + TAILQ_FOREACH (mapping_node, &mappings_root_node->head, next) { + SCConfNode *tenant_id_node = SCConfNodeLookupChild(mapping_node, "tenant-id"); + if (tenant_id_node == NULL) + goto bad_mapping; + SCConfNode *tunnel_id_node = SCConfNodeLookupChild(mapping_node, "tunnel-id"); + if (tunnel_id_node == NULL) + goto bad_mapping; + + uint32_t tenant_id = 0; + if (StringParseUint32(&tenant_id, 10, (uint16_t)strlen(tenant_id_node->val), + tenant_id_node->val) < 0) { + SCLogError("tenant-id " + "of %s is invalid", + tenant_id_node->val); + goto bad_mapping; + } + + uint16_t tunnel_id = 0; + if (StringParseUint16(&tunnel_id, 10, (uint16_t)strlen(tunnel_id_node->val), + tunnel_id_node->val) < 0) { + SCLogError("tunnel id " + "of %s is invalid", + tunnel_id_node->val); + goto bad_mapping; + } + + if (DetectEngineTenantRegisterTunnel(tenant_id, tunnel_id) != 0) { + goto error; + } + SCLogConfig("tunnel %u connected to tenant-id %u", tunnel_id, tenant_id); + mapping_cnt++; + continue; + + bad_mapping: + if (failure_fatal) + goto error; + } + } + return mapping_cnt; + +error: + return 0; +} + static int DetectEngineMultiTenantSetupLoadLivedevMappings( const SCConfNode *mappings_root_node, bool failure_fatal) { @@ -4330,6 +4386,8 @@ int DetectEngineMultiTenantSetup(const bool unix_socket) } else if (strcmp(handler, "direct") == 0) { tenant_selector = master->tenant_selector = TENANT_SELECTOR_DIRECT; + } else if (strcmp(handler, "tunnel") == 0) { + tenant_selector = master->tenant_selector = TENANT_SELECTOR_TUNNEL; } else if (strcmp(handler, "device") == 0) { tenant_selector = master->tenant_selector = TENANT_SELECTOR_LIVEDEV; if (EngineModeIsIPS()) { @@ -4372,6 +4430,17 @@ int DetectEngineMultiTenantSetup(const bool unix_socket) } } } + } else if (tenant_selector == TENANT_SELECTOR_TUNNEL) { + int mapping_cnt = DetectEngineMultiTenantSetupLoadTunnelMappings( + mappings_root_node, failure_fatal); + if (mapping_cnt == 0) { + if (failure_fatal) { + SCLogError("no multi-detect mappings defined"); + goto error; + } else { + SCLogWarning("no multi-detect mappings defined"); + } + } } else if (tenant_selector == TENANT_SELECTOR_LIVEDEV) { int mapping_cnt = DetectEngineMultiTenantSetupLoadLivedevMappings(mappings_root_node, failure_fatal); @@ -4498,6 +4567,26 @@ static uint32_t DetectEngineTenantGetIdFromLivedev(const void *ctx, const Packet return ld->tenant_id; } +static uint32_t DetectEngineTenantGetIdFromTunnel(const void *ctx, const Packet *p) +{ + const DetectEngineThreadCtx *det_ctx = ctx; + + if (p->tunnel_id == 0 || p->tunnel_id == PKT_TUNNEL_UNKNOWN) + return 0; + + if (det_ctx == NULL || det_ctx->tenant_array == NULL || det_ctx->tenant_array_size == 0) + return 0; + + /* not very efficient, but for now we're targeting only limited amounts. + * Can use hash/tree approach later. */ + for (uint32_t x = 0; x < det_ctx->tenant_array_size; x++) { + if (det_ctx->tenant_array[x].traffic_id == p->tunnel_id) + return det_ctx->tenant_array[x].tenant_id; + } + + return 0; +} + static int DetectEngineTenantRegisterSelector( enum DetectEngineTenantSelectors selector, uint32_t tenant_id, uint32_t traffic_id) { @@ -4581,6 +4670,12 @@ int DetectEngineTenantRegisterLivedev(uint32_t tenant_id, int device_id) TENANT_SELECTOR_LIVEDEV, tenant_id, (uint32_t)device_id); } +int DetectEngineTenantRegisterTunnel(uint32_t tenant_id, uint16_t tunnel_id) +{ + return DetectEngineTenantRegisterSelector( + TENANT_SELECTOR_TUNNEL, tenant_id, (uint32_t)tunnel_id); +} + int DetectEngineTenantRegisterVlanId(uint32_t tenant_id, uint16_t vlan_id) { return DetectEngineTenantRegisterSelector(TENANT_SELECTOR_VLAN, tenant_id, (uint32_t)vlan_id); diff --git a/src/detect-engine.h b/src/detect-engine.h index c778a9d412e5..d52187fcd241 100644 --- a/src/detect-engine.h +++ b/src/detect-engine.h @@ -121,6 +121,7 @@ int DetectEngineReloadTenantBlocking(uint32_t tenant_id, const char *yaml, int r int DetectEngineReloadTenantsBlocking(const int reload_cnt); int DetectEngineTenantRegisterLivedev(uint32_t tenant_id, int device_id); +int DetectEngineTenantRegisterTunnel(uint32_t tenant_id, uint16_t tunnel_id); int DetectEngineTenantRegisterVlanId(uint32_t tenant_id, uint16_t vlan_id); int DetectEngineTenantUnregisterVlanId(uint32_t tenant_id, uint16_t vlan_id); int DetectEngineTenantRegisterPcapFile(uint32_t tenant_id); diff --git a/src/detect.h b/src/detect.h index 6f092b533312..321319222937 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1681,12 +1681,12 @@ typedef struct SigGroupHead_ { /** keyword supporting setting an optional direction */ #define SIGMATCH_SUPPORT_DIR BIT_U16(13) -enum DetectEngineTenantSelectors -{ - TENANT_SELECTOR_UNKNOWN = 0, /**< not set */ - TENANT_SELECTOR_DIRECT, /**< method provides direct tenant id */ - TENANT_SELECTOR_VLAN, /**< map vlan to tenant id */ - TENANT_SELECTOR_LIVEDEV, /**< map livedev to tenant id */ +enum DetectEngineTenantSelectors { + TENANT_SELECTOR_UNKNOWN = 0, /**< not set */ + TENANT_SELECTOR_DIRECT, /**< method provides direct tenant id */ + TENANT_SELECTOR_VLAN, /**< map vlan to tenant id */ + TENANT_SELECTOR_LIVEDEV, /**< map livedev to tenant id */ + TENANT_SELECTOR_TUNNEL, /**< map tunnel id to tenant id */ }; typedef struct DetectEngineTenantMapping_ { From 92be36f908be985107adff39ea3ecb114dac2e3b Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 15 May 2025 14:15:47 +0200 Subject: [PATCH 11/11] flow: add config option to skip non-tunneled packets Ticket: 7674 On interfaces meant to receive only tunneled traffic --- doc/userguide/configuration/suricata-yaml.rst | 4 +++ src/decode.c | 33 +++++++++++++++++++ src/decode.h | 3 +- src/flow-worker.c | 3 ++ src/util-device-private.h | 1 + 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 0922dade725e..dd615a4cf7bc 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -3045,6 +3045,10 @@ This section is a list of tunnels with the following parameters: dst: 192.168.1.3 session: 123 # erspan span id or vxlan vni +It is also recommended to define ``decoder.tunnels-ifaces`` list of interfaces +receiving tunneled traffic. The traffic received on these interfaces that do +not belong to a defined tunnel will be skipped. + Advanced Options ---------------- diff --git a/src/decode.c b/src/decode.c index 3674d8e9ca59..189edc80dbb0 100644 --- a/src/decode.c +++ b/src/decode.c @@ -71,6 +71,7 @@ #include "util-profiling.h" #include "util-validate.h" #include "util-debug.h" +#include "util-device-private.h" #include "util-exception-policy.h" #include "action-globals.h" @@ -224,6 +225,13 @@ void PacketFree(Packet *p) SCFree(p); } +static bool PacketIsInTunnelIface(Packet *p) +{ + if (p->livedev) { + return p->livedev->skip_non_tunnel; + } + return false; +} /** * \brief Finalize decoding of a packet * @@ -236,6 +244,10 @@ void PacketDecodeFinalize(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p) if (p->flags & PKT_IS_INVALID) { StatsIncr(tv, dtv->counter_invalid); } + if (p->tunnel_id == 0 && PacketIsInTunnelIface(p)) { + // skips non-tunnel packets + p->flags |= PKT_SKIP_WORK; + } } void PacketUpdateEngineEventCounters(ThreadVars *tv, @@ -386,8 +398,12 @@ static void *decode_tunnels_map; void PacketGetTunnelId(Packet *p, uint32_t session) { + bool packet_in_tunnel_iface = PacketIsInTunnelIface(p); if (decode_tunnels_map == NULL || p->root == NULL || !PacketIsIPv4(p->root)) { p->tunnel_id = PKT_TUNNEL_UNKNOWN; + if (packet_in_tunnel_iface) { + p->flags |= PKT_SKIP_WORK; + } return; } struct flowtunnel_keys k = {}; @@ -396,6 +412,9 @@ void PacketGetTunnelId(Packet *p, uint32_t session) k.session = session; k.tunnel_type = (uint8_t)p->ttype; p->tunnel_id = DecodeTunnelsId(decode_tunnels_map, k); + if (packet_in_tunnel_iface && p->tunnel_id == PKT_TUNNEL_UNKNOWN) { + p->flags |= PKT_SKIP_WORK; + } } void *DecodeTunnelsGetMapIter(void) @@ -1081,6 +1100,20 @@ void DecodeGlobalConfig(void) DecodeVXLANConfig(); DecodeERSPANConfig(); decode_tunnels_map = DecodeTunnelsConfig(); + SCConfNode *tunnel_ifaces_node = SCConfGetNode("decoder.tunnel-ifaces"); + if (tunnel_ifaces_node != NULL) { + SCConfNode *child = NULL; + TAILQ_FOREACH (child, &tunnel_ifaces_node->head, next) { + if (child->val != NULL) { + LiveDevice *ld = LiveGetDevice(child->val); + if (ld != NULL) { + ld->skip_non_tunnel = true; + } else { + SCLogWarning("%s is not a registered device", child->val); + } + } + } + } intmax_t value = 0; if (SCConfGetInt("decoder.max-layers", &value) == 1) { if (value < 0 || value > UINT8_MAX) { diff --git a/src/decode.h b/src/decode.h index c58a18389629..95bdbe55eafc 100644 --- a/src/decode.h +++ b/src/decode.h @@ -1269,7 +1269,8 @@ void DecodeUnregisterCounters(void); /** Packet is part of established stream */ #define PKT_STREAM_EST BIT_U32(6) -// vacancy +/** Flag to indicate that worker to skip the packet */ +#define PKT_SKIP_WORK BIT_U32(7) #define PKT_HAS_FLOW BIT_U32(8) /** Pseudo packet to end the stream */ diff --git a/src/flow-worker.c b/src/flow-worker.c index 0bd4ae278595..d11c0392fad9 100644 --- a/src/flow-worker.c +++ b/src/flow-worker.c @@ -564,6 +564,9 @@ static TmEcode FlowWorker(ThreadVars *tv, Packet *p, void *data) SCLogDebug("packet %"PRIu64, p->pcap_cnt); + if (p->flags & PKT_SKIP_WORK) { + return TM_ECODE_OK; + } if ((PKT_IS_FLUSHPKT(p))) { SCLogDebug("thread %s flushing", tv->printable_name); OutputLoggerFlush(tv, p, fw->output_thread); diff --git a/src/util-device-private.h b/src/util-device-private.h index 1e099d1e67b3..a0f035658d2f 100644 --- a/src/util-device-private.h +++ b/src/util-device-private.h @@ -34,6 +34,7 @@ typedef struct LiveDevice_ { char dev_short[MAX_DEVNAME + 1]; int mtu; /* MTU of the device */ bool tenant_id_set; + bool skip_non_tunnel; uint16_t id;