From 76727dac4b27884cb2d4ab06ce92f0f58afad253 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 23 Jul 2025 21:14:26 +0200 Subject: [PATCH 01/10] detect: convert file_flags to a uint16_t Following commit will add a new value that will overflow the actual size. Ticket: 7816 --- src/detect.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/detect.h b/src/detect.h index 5d06bd093788..afe98582392f 100644 --- a/src/detect.h +++ b/src/detect.h @@ -317,14 +317,14 @@ typedef struct DetectPort_ { // vacancy 1x #define SIG_MASK_REQUIRE_ENGINE_EVENT BIT_U8(7) -#define FILE_SIG_NEED_FILE 0x01 -#define FILE_SIG_NEED_FILENAME 0x02 -#define FILE_SIG_NEED_MAGIC 0x04 /**< need the start of the file */ -#define FILE_SIG_NEED_FILECONTENT 0x08 -#define FILE_SIG_NEED_MD5 0x10 -#define FILE_SIG_NEED_SHA1 0x20 -#define FILE_SIG_NEED_SHA256 0x40 -#define FILE_SIG_NEED_SIZE 0x80 +#define FILE_SIG_NEED_FILE BIT_U16(0) +#define FILE_SIG_NEED_FILENAME BIT_U16(1) +#define FILE_SIG_NEED_MAGIC BIT_U16(2) /**< need the start of the file */ +#define FILE_SIG_NEED_FILECONTENT BIT_U16(3) +#define FILE_SIG_NEED_MD5 BIT_U16(4) +#define FILE_SIG_NEED_SHA1 BIT_U16(5) +#define FILE_SIG_NEED_SHA256 BIT_U16(6) +#define FILE_SIG_NEED_SIZE BIT_U16(7) /* Detection Engine flags */ #define DE_QUIET 0x01 /**< DE is quiet (esp for unittests) */ @@ -681,7 +681,7 @@ typedef struct Signature_ { /** inline -- action */ uint8_t action; - uint8_t file_flags; + uint16_t file_flags; /** addresses, ports and proto this sig matches on */ DetectProto proto; From ce977d97a95962be155d3c4d8102e4304973aefd Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 23 Jul 2025 21:21:31 +0200 Subject: [PATCH 02/10] file: mime type as alternative to magic File magic is known to have performance issue. When looking for an alternative, I've been pointed to the tree_magic_mini crate that output the mime type of a binary stream. This is different from magic but it has multiple advantages as it is a standard so it can be correlated with other tools. So instead of replacing magic, this patch adds a new mime type output to fileinfo events. This patch also adds the `file.mimetype` sticky buffer. Mime type has 2 advantages over file magic. First it is really faster and second, the result are easier to use as the MIME type are well defined. But it provides less information than magic for example with regards to the size of images. Ticket: 7816 --- .github/workflows/rust.yml | 2 +- etc/schema.json | 8 ++ rust/Cargo.lock.in | 43 ++++++++ rust/Cargo.toml.in | 2 + rust/src/filemimetype.rs | 26 +++++ rust/src/lib.rs | 1 + src/Makefile.am | 4 + src/app-layer-smtp.c | 3 +- src/detect-engine-build.c | 19 ++++ src/detect-engine-build.h | 1 + src/detect-engine-file.c | 6 + src/detect-engine-register.c | 2 + src/detect-engine-register.h | 1 + src/detect-engine-siggroup.c | 3 + src/detect-file-mimetype.c | 206 +++++++++++++++++++++++++++++++++++ src/detect-file-mimetype.h | 31 ++++++ src/detect.c | 4 + src/detect.h | 2 + src/flow.c | 1 + src/flow.h | 22 ++-- src/output-file.c | 5 + src/output-filedata.c | 5 + src/output-filestore.c | 6 + src/output-json-alert.c | 1 + src/output-json-file.c | 6 + src/output-json.c | 5 + src/util-file.c | 39 +++++++ src/util-file.h | 5 + src/util-mimetype.c | 51 +++++++++ src/util-mimetype.h | 30 +++++ suricata.yaml.in | 1 + 31 files changed, 527 insertions(+), 14 deletions(-) create mode 100644 rust/src/filemimetype.rs create mode 100644 src/detect-file-mimetype.c create mode 100644 src/detect-file-mimetype.h create mode 100644 src/util-mimetype.c create mode 100644 src/util-mimetype.h diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 134d23517e8e..cdb2677f5318 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -68,7 +68,7 @@ jobs: - run: git config --global --add safe.directory /__w/suricata/suricata - run: ./scripts/bundle.sh - run: ./autogen.sh - - run: ./configure --enable-warnings + - run: ./configure --enable-warnings --enable-mimetype --enable-bundled-gpl-mimetype - name: Checking bindgen output working-directory: rust run: | diff --git a/etc/schema.json b/etc/schema.json index 97d56571950d..f15c828df0b6 100644 --- a/etc/schema.json +++ b/etc/schema.json @@ -1729,6 +1729,10 @@ "md5": { "type": "string" }, + "mimetype": { + "type": "string", + "description": "The MIME type of the file (e.g., application/pdf, image/png, etc.)" + }, "sha1": { "type": "string" }, @@ -1788,6 +1792,10 @@ "md5": { "type": "string" }, + "mimetype": { + "type": "string", + "description": "The MIME type of the file (e.g., application/pdf, image/png, etc.)" + }, "sha1": { "type": "string" }, diff --git a/rust/Cargo.lock.in b/rust/Cargo.lock.in index 933fbe72e502..b98128c07a7d 100644 --- a/rust/Cargo.lock.in +++ b/rust/Cargo.lock.in @@ -480,6 +480,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "flate2" version = "1.0.35" @@ -491,6 +497,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -1046,6 +1058,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "phf" version = "0.10.1" @@ -1552,6 +1574,7 @@ dependencies = [ "test-case", "time", "tls-parser", + "tree_magic_mini", "uuid", "widestring", "x509-parser", @@ -1861,6 +1884,26 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree_magic_db" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd30f22e7532ed0d3d846e24841a132c8dcb779f5b497bda82d904aa04755375" + +[[package]] +name = "tree_magic_mini" +version = "3.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aac5e8971f245c3389a5a76e648bfc80803ae066a1243a75db0064d7c1129d63" +dependencies = [ + "fnv", + "memchr", + "nom", + "once_cell", + "petgraph", + "tree_magic_db", +] + [[package]] name = "typenum" version = "1.18.0" diff --git a/rust/Cargo.toml.in b/rust/Cargo.toml.in index 4d656df7f6b9..6ff8bc7f464c 100644 --- a/rust/Cargo.toml.in +++ b/rust/Cargo.toml.in @@ -58,6 +58,8 @@ lru = "~0.12.5" der-parser = { version = "~9.0.0", default-features = false } kerberos-parser = { version = "~0.8.0", default-features = false } +tree_magic_mini = "~3.1.6" + sawp-modbus = "~0.13.1" sawp-pop3 = "~0.13.1" sawp = "~0.13.1" diff --git a/rust/src/filemimetype.rs b/rust/src/filemimetype.rs new file mode 100644 index 000000000000..29cca7e00f29 --- /dev/null +++ b/rust/src/filemimetype.rs @@ -0,0 +1,26 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use crate::common::rust_string_to_c; +use std::os::raw::c_char; + +#[no_mangle] +pub unsafe extern "C" fn SCGetMimetype(input: *const u8, len: u32) -> * mut c_char { + let slice: &[u8] = std::slice::from_raw_parts(input as *mut u8, len as usize); + let result = tree_magic_mini::from_u8(slice); + rust_string_to_c(result.to_string()) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index bb7fe6e9fdf6..4fe26408396e 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -90,6 +90,7 @@ pub mod applayer; pub mod frames; pub mod filecontainer; pub mod filetracker; +pub mod filemimetype; pub mod kerberos; pub mod detect; pub mod utils; diff --git a/src/Makefile.am b/src/Makefile.am index 5a83ab290d1a..78408afbf0f9 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -160,6 +160,7 @@ noinst_HEADERS = \ detect-fast-pattern.h \ detect-file-data.h \ detect-file-hash-common.h \ + detect-file-mimetype.h \ detect-filemagic.h \ detect-filemd5.h \ detect-filename.h \ @@ -565,6 +566,7 @@ noinst_HEADERS = \ util-memcmp.h \ util-memcpy.h \ util-memrchr.h \ + util-mimetype.h \ util-misc.h \ util-mpm-ac-ks.h \ util-mpm-ac-queue.h \ @@ -761,6 +763,7 @@ libsuricata_c_a_SOURCES = \ detect-fast-pattern.c \ detect-file-data.c \ detect-file-hash-common.c \ + detect-file-mimetype.c \ detect-filemagic.c \ detect-filemd5.c \ detect-filename.c \ @@ -1147,6 +1150,7 @@ libsuricata_c_a_SOURCES = \ util-mem.c \ util-memcmp.c \ util-memrchr.c \ + util-mimetype.c \ util-misc.c \ util-mpm-ac-ks-small.c \ util-mpm-ac-ks.c \ diff --git a/src/app-layer-smtp.c b/src/app-layer-smtp.c index a7b76ec44290..59806a9eb942 100644 --- a/src/app-layer-smtp.c +++ b/src/app-layer-smtp.c @@ -36,6 +36,7 @@ #include "app-layer-smtp.h" #include "util-enum.h" +#include "util-file.h" #include "util-mpm.h" #include "util-debug.h" #include "util-byte.h" @@ -1193,7 +1194,7 @@ static int SMTPProcessRequest( } else if (smtp_config.raw_extraction) { if (FileOpenFileWithId(&tx->files_ts, &smtp_config.sbcfg, state->file_track_id++, (uint8_t *)rawmsgname, strlen(rawmsgname), NULL, 0, - FILE_NOMD5 | FILE_NOMAGIC) == 0) { + FILE_NOMD5 | FILE_NOMAGIC | FILE_NOMIMETYPE) == 0) { SMTPNewFile(tx, tx->files_ts.tail); } } else if (smtp_config.decode_mime) { diff --git a/src/detect-engine-build.c b/src/detect-engine-build.c index 4d3aac45385c..8fab5e009b0e 100644 --- a/src/detect-engine-build.c +++ b/src/detect-engine-build.c @@ -128,6 +128,25 @@ int SignatureIsFilemagicInspecting(const Signature *s) return 0; } +/** + * \brief Check if a signature contains the file.mimetype keyword. + * + * \param s signature + * + * \retval 0 no + * \retval 1 yes + */ +int SignatureIsFileMimetypeInspecting(const Signature *s) +{ + if (s == NULL) + return 0; + + if (s->file_flags & FILE_SIG_NEED_MIMETYPE) + return 1; + + return 0; +} + /** * \brief Check if a signature contains the filemd5 keyword. * diff --git a/src/detect-engine-build.h b/src/detect-engine-build.h index a61a930f9a05..149aa1369c2e 100644 --- a/src/detect-engine-build.h +++ b/src/detect-engine-build.h @@ -23,6 +23,7 @@ void PacketCreateMask(Packet *p, SignatureMask *mask, AppProto alproto, int SignatureIsFilestoring(const Signature *); int SignatureIsFilemagicInspecting(const Signature *); +int SignatureIsFileMimetypeInspecting(const Signature *); int SignatureIsFileMd5Inspecting(const Signature *); int SignatureIsFileSha1Inspecting(const Signature *s); int SignatureIsFileSha256Inspecting(const Signature *s); diff --git a/src/detect-engine-file.c b/src/detect-engine-file.c index 26601ce8a96b..ce2cd47a9293 100644 --- a/src/detect-engine-file.c +++ b/src/detect-engine-file.c @@ -92,6 +92,12 @@ static uint8_t DetectFileInspect(DetectEngineThreadCtx *det_ctx, Flow *f, const continue; } + if ((s->file_flags & FILE_SIG_NEED_MIMETYPE) && file_size == 0) { + SCLogDebug("sig needs file content, but we don't have any"); + r = DETECT_ENGINE_INSPECT_SIG_NO_MATCH; + continue; + } + if ((s->file_flags & FILE_SIG_NEED_FILECONTENT) && file_size == 0) { SCLogDebug("sig needs file content, but we don't have any"); r = DETECT_ENGINE_INSPECT_SIG_NO_MATCH; diff --git a/src/detect-engine-register.c b/src/detect-engine-register.c index 81da9fa1ee0f..674d6451a7ce 100644 --- a/src/detect-engine-register.c +++ b/src/detect-engine-register.c @@ -137,6 +137,7 @@ #include "detect-filesha1.h" #include "detect-filesha256.h" #include "detect-filesize.h" +#include "detect-file-mimetype.h" #include "detect-dataset.h" #include "detect-datarep.h" #include "detect-dsize.h" @@ -565,6 +566,7 @@ void SigTableSetup(void) DetectFileSha1Register(); DetectFileSha256Register(); DetectFilesizeRegister(); + DetectFileMimetypeRegister(); DetectHttpUARegister(); DetectHttpHHRegister(); diff --git a/src/detect-engine-register.h b/src/detect-engine-register.h index 0e5e52242c76..3edaa3b10de5 100644 --- a/src/detect-engine-register.h +++ b/src/detect-engine-register.h @@ -235,6 +235,7 @@ enum DetectKeywordId { DETECT_FILESTORE_POSTMATCH, DETECT_FILEMAGIC, DETECT_FILE_MAGIC, + DETECT_FILE_MIMETYPE, DETECT_FILEMD5, DETECT_FILESHA1, DETECT_FILESHA256, diff --git a/src/detect-engine-siggroup.c b/src/detect-engine-siggroup.c index 2163010e58c4..6af47b8dddd8 100644 --- a/src/detect-engine-siggroup.c +++ b/src/detect-engine-siggroup.c @@ -600,6 +600,9 @@ void SigGroupHeadSetupFiles(const DetectEngineCtx *de_ctx, SigGroupHead *sgh) sgh->flags |= SIG_GROUP_HEAD_HAVEFILEMAGIC; } #endif + if (SignatureIsFileMimetypeInspecting(s)) { + sgh->flags |= SIG_GROUP_HEAD_HAVEFILEMIMETYPE; + } if (SignatureIsFilestoring(s)) { // should be insured by caller that we do not overflow DEBUG_VALIDATE_BUG_ON(sgh->filestore_cnt == UINT16_MAX); diff --git a/src/detect-file-mimetype.c b/src/detect-file-mimetype.c new file mode 100644 index 000000000000..2a371dd24076 --- /dev/null +++ b/src/detect-file-mimetype.c @@ -0,0 +1,206 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + * + */ + +#include "suricata-common.h" +#include "detect-file-mimetype.h" +#include "detect-engine.h" +#include "detect-engine-buffer.h" +#include "detect-engine-mpm.h" +#include "detect-engine-prefilter.h" +#include "detect-engine-content-inspection.h" +#include "detect-parse.h" +#include "detect-file-data.h" +#include "rust.h" +#include "util-mimetype.h" +#include "util-profiling.h" +#include "app-layer-parser.h" + +static int g_file_match_list_id = 0; + +static int DetectFileMimetypeSetup(DetectEngineCtx *de_ctx, Signature *s, const char *str); +static int g_file_mimetype_buffer_id = 0; + +static int PrefilterMpmFileMimetypeRegister(DetectEngineCtx *de_ctx, SigGroupHead *sgh, + MpmCtx *mpm_ctx, const DetectBufferMpmRegistry *mpm_reg, int list_id); +static unsigned char DetectEngineInspectFileMimetype(DetectEngineCtx *de_ctx, + DetectEngineThreadCtx *det_ctx, const DetectEngineAppInspectionEngine *engine, + const Signature *s, Flow *f, uint8_t flags, void *alstate, void *txv, uint64_t tx_id); + +void DetectFileMimetypeRegister(void) +{ + sigmatch_table[DETECT_FILE_MIMETYPE].name = "file.mime_type"; + sigmatch_table[DETECT_FILE_MIMETYPE].desc = "sticky buffer to match on file mime type"; + sigmatch_table[DETECT_FILE_MIMETYPE].url = "/rules/file-keywords.html#file_mimetype"; + sigmatch_table[DETECT_FILE_MIMETYPE].Setup = DetectFileMimetypeSetup; + sigmatch_table[DETECT_FILE_MIMETYPE].flags = SIGMATCH_NOOPT | SIGMATCH_INFO_STICKY_BUFFER; + + filehandler_table[DETECT_FILE_MIMETYPE].name = "file.mime_type", + filehandler_table[DETECT_FILE_MIMETYPE].priority = 2; + filehandler_table[DETECT_FILE_MIMETYPE].PrefilterFn = PrefilterMpmFileMimetypeRegister; + filehandler_table[DETECT_FILE_MIMETYPE].Callback = DetectEngineInspectFileMimetype; + + g_file_match_list_id = DetectBufferTypeRegister("files"); + + DetectBufferTypeSetDescriptionByName("file.mime_type", "file mime_type"); + DetectBufferTypeSupportsMultiInstance("file.mime_type"); + + g_file_mimetype_buffer_id = DetectBufferTypeGetByName("file.mime_type"); + + SCLogDebug("registering file mime type rule option"); +} + +static int DetectFileMimetypeSetup(DetectEngineCtx *de_ctx, Signature *s, const char *str) +{ + if (SCDetectBufferSetActiveList(de_ctx, s, g_file_mimetype_buffer_id) < 0) + return -1; + s->file_flags |= (FILE_SIG_NEED_FILE | FILE_SIG_NEED_MIMETYPE); + return 0; +} + +static InspectionBuffer *FileMimetypeGetDataCallback(DetectEngineThreadCtx *det_ctx, + const DetectEngineTransforms *transforms, Flow *f, uint8_t flow_flags, File *cur_file, + int list_id, int local_file_id, bool first) +{ + SCEnter(); + + InspectionBuffer *buffer = InspectionBufferMultipleForListGet(det_ctx, list_id, local_file_id); + if (buffer == NULL) + return NULL; + if (!first && buffer->inspect != NULL) + return buffer; + + if (cur_file->mimetype == NULL) + FileMimetypeLookup(cur_file); + if (cur_file->mimetype == NULL) + return NULL; + + const uint8_t *data = (uint8_t *)cur_file->mimetype; + uint32_t data_len = (uint32_t)strlen(cur_file->mimetype); + + InspectionBufferSetupMulti(det_ctx, buffer, transforms, data, data_len); + + SCReturnPtr(buffer, "InspectionBuffer"); +} + +typedef struct PrefilterMpmFileMimetype { + int list_id; + const MpmCtx *mpm_ctx; + const DetectEngineTransforms *transforms; +} PrefilterMpmFileMimetype; + +/** \brief Filemimetype Filemimetype Mpm prefilter callback + * + * \param det_ctx detection engine thread ctx + * \param pectx inspection context + * \param p packet to inspect + * \param f flow to inspect + * \param txv tx to inspect + * \param idx transaction id + * \param flags STREAM_* flags including direction + */ +static void PrefilterTxFileMimetype(DetectEngineThreadCtx *det_ctx, const void *pectx, Packet *p, + Flow *f, void *txv, const uint64_t idx, const AppLayerTxData *txd, const uint8_t flags) +{ + SCEnter(); + + const PrefilterMpmFileMimetype *ctx = (const PrefilterMpmFileMimetype *)pectx; + const MpmCtx *mpm_ctx = ctx->mpm_ctx; + const int list_id = ctx->list_id; + + AppLayerGetFileState files = AppLayerParserGetTxFiles(f, txv, flags); + FileContainer *ffc = files.fc; + if (ffc != NULL) { + int local_file_id = 0; + for (File *file = ffc->head; file != NULL; file = file->next) { + InspectionBuffer *buffer = FileMimetypeGetDataCallback( + det_ctx, ctx->transforms, f, flags, file, list_id, local_file_id, txv); + if (buffer == NULL) + continue; + + if (buffer->inspect_len >= mpm_ctx->minlen) { + (void)mpm_table[mpm_ctx->mpm_type].Search(mpm_ctx, &det_ctx->mtc, &det_ctx->pmq, + buffer->inspect, buffer->inspect_len); + PREFILTER_PROFILING_ADD_BYTES(det_ctx, buffer->inspect_len); + } + local_file_id++; + } + } +} + +static void PrefilterMpmFileMimetypeFree(void *ptr) +{ + SCFree(ptr); +} + +static int PrefilterMpmFileMimetypeRegister(DetectEngineCtx *de_ctx, SigGroupHead *sgh, + MpmCtx *mpm_ctx, const DetectBufferMpmRegistry *mpm_reg, int list_id) +{ + PrefilterMpmFileMimetype *pectx = SCCalloc(1, sizeof(*pectx)); + if (pectx == NULL) + return -1; + pectx->list_id = list_id; + pectx->mpm_ctx = mpm_ctx; + pectx->transforms = &mpm_reg->transforms; + + return PrefilterAppendTxEngine(de_ctx, sgh, PrefilterTxFileMimetype, mpm_reg->app_v2.alproto, + mpm_reg->app_v2.tx_min_progress, pectx, PrefilterMpmFileMimetypeFree, mpm_reg->pname); +} + +static unsigned char DetectEngineInspectFileMimetype(DetectEngineCtx *de_ctx, + DetectEngineThreadCtx *det_ctx, const DetectEngineAppInspectionEngine *engine, + const Signature *s, Flow *f, uint8_t flags, void *alstate, void *txv, uint64_t tx_id) +{ + const DetectEngineTransforms *transforms = NULL; + if (!engine->mpm) { + transforms = engine->v2.transforms; + } + + AppLayerGetFileState files = AppLayerParserGetTxFiles(f, txv, flags); + FileContainer *ffc = files.fc; + if (ffc == NULL) { + return DETECT_ENGINE_INSPECT_SIG_CANT_MATCH_FILES; + } + + uint8_t r = DETECT_ENGINE_INSPECT_SIG_NO_MATCH; + int local_file_id = 0; + for (File *file = ffc->head; file != NULL; file = file->next) { + InspectionBuffer *buffer = FileMimetypeGetDataCallback( + det_ctx, transforms, f, flags, file, engine->sm_list, local_file_id, txv); + if (buffer == NULL) { + local_file_id++; + continue; + } + + const bool match = DetectEngineContentInspection(de_ctx, det_ctx, s, engine->smd, NULL, f, + (uint8_t *)buffer->inspect, buffer->inspect_len, buffer->inspect_offset, + DETECT_CI_FLAGS_SINGLE, DETECT_ENGINE_CONTENT_INSPECTION_MODE_STATE); + if (match) { + return DETECT_ENGINE_INSPECT_SIG_MATCH; + } else { + r = DETECT_ENGINE_INSPECT_SIG_CANT_MATCH_FILES; + } + local_file_id++; + } + return r; +} diff --git a/src/detect-file-mimetype.h b/src/detect-file-mimetype.h new file mode 100644 index 000000000000..6badb38060be --- /dev/null +++ b/src/detect-file-mimetype.h @@ -0,0 +1,31 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + * + */ + +#ifndef SURICATA_DETECT_FILE_MIMETYPE_H +#define SURICATA_DETECT_FILE_MIMETYPE_H + +/* prototypes */ +void DetectFileMimetypeRegister(void); + +#endif /* SURICATA_DETECT_FILE_MIMETYPE_H */ diff --git a/src/detect.c b/src/detect.c index e6e297ca7378..8dd6b0851283 100644 --- a/src/detect.c +++ b/src/detect.c @@ -374,6 +374,10 @@ DetectPostInspectFileFlagsUpdate(Flow *f, const SigGroupHead *sgh, uint8_t direc flow_file_flags |= (FLOWFILE_NO_MAGIC_TS|FLOWFILE_NO_MAGIC_TC); } #endif + if (!(sgh->flags & SIG_GROUP_HEAD_HAVEFILEMIMETYPE)) { + SCLogDebug("requesting disabling mimetype for flow"); + flow_file_flags |= (FLOWFILE_NO_MIMETYPE_TS | FLOWFILE_NO_MIMETYPE_TC); + } if (!(sgh->flags & SIG_GROUP_HEAD_HAVEFILEMD5)) { SCLogDebug("requesting disabling md5 for flow"); flow_file_flags |= (FLOWFILE_NO_MD5_TS|FLOWFILE_NO_MD5_TC); diff --git a/src/detect.h b/src/detect.h index afe98582392f..ad6f68bbdeac 100644 --- a/src/detect.h +++ b/src/detect.h @@ -325,6 +325,7 @@ typedef struct DetectPort_ { #define FILE_SIG_NEED_SHA1 BIT_U16(5) #define FILE_SIG_NEED_SHA256 BIT_U16(6) #define FILE_SIG_NEED_SIZE BIT_U16(7) +#define FILE_SIG_NEED_MIMETYPE BIT_U16(8) /* Detection Engine flags */ #define DE_QUIET 0x01 /**< DE is quiet (esp for unittests) */ @@ -1493,6 +1494,7 @@ enum { #define SIG_GROUP_HEAD_HAVEFILESIZE BIT_U16(3) #define SIG_GROUP_HEAD_HAVEFILESHA1 BIT_U16(4) #define SIG_GROUP_HEAD_HAVEFILESHA256 BIT_U16(5) +#define SIG_GROUP_HEAD_HAVEFILEMIMETYPE BIT_U16(6) enum MpmBuiltinBuffers { MPMB_TCP_PKT_TS, diff --git a/src/flow.c b/src/flow.c index ef10da305ec2..ba58c4c6c665 100644 --- a/src/flow.c +++ b/src/flow.c @@ -223,6 +223,7 @@ static inline void FlowSwapFileFlags(Flow *f) SWAP_FLAGS(f->file_flags, FLOWFILE_NO_SHA1_TS, FLOWFILE_NO_SHA1_TC); SWAP_FLAGS(f->file_flags, FLOWFILE_NO_SHA256_TS, FLOWFILE_NO_SHA256_TC); SWAP_FLAGS(f->file_flags, FLOWFILE_NO_SIZE_TS, FLOWFILE_NO_SIZE_TC); + SWAP_FLAGS(f->file_flags, FLOWFILE_NO_MIMETYPE_TS, FLOWFILE_NO_MIMETYPE_TC); } static inline void TcpStreamFlowSwap(Flow *f) diff --git a/src/flow.h b/src/flow.h index 43e438207cc2..e02166d2829c 100644 --- a/src/flow.h +++ b/src/flow.h @@ -153,18 +153,16 @@ typedef struct AppLayerParserState_ AppLayerParserState; #define FLOWFILE_STORE_TS BIT_U16(12) #define FLOWFILE_STORE_TC BIT_U16(13) -#define FLOWFILE_NONE_TS (FLOWFILE_NO_MAGIC_TS | \ - FLOWFILE_NO_STORE_TS | \ - FLOWFILE_NO_MD5_TS | \ - FLOWFILE_NO_SHA1_TS | \ - FLOWFILE_NO_SHA256_TS| \ - FLOWFILE_NO_SIZE_TS) -#define FLOWFILE_NONE_TC (FLOWFILE_NO_MAGIC_TC | \ - FLOWFILE_NO_STORE_TC | \ - FLOWFILE_NO_MD5_TC | \ - FLOWFILE_NO_SHA1_TC | \ - FLOWFILE_NO_SHA256_TC| \ - FLOWFILE_NO_SIZE_TC) +/** no mime type tracking of files in this flow */ +#define FLOWFILE_NO_MIMETYPE_TS BIT_U16(14) +#define FLOWFILE_NO_MIMETYPE_TC BIT_U16(15) + +#define FLOWFILE_NONE_TS \ + (FLOWFILE_NO_MAGIC_TS | FLOWFILE_NO_STORE_TS | FLOWFILE_NO_MD5_TS | FLOWFILE_NO_SHA1_TS | \ + FLOWFILE_NO_SHA256_TS | FLOWFILE_NO_SIZE_TS | FLOWFILE_NO_MIMETYPE_TS) +#define FLOWFILE_NONE_TC \ + (FLOWFILE_NO_MAGIC_TC | FLOWFILE_NO_STORE_TC | FLOWFILE_NO_MD5_TC | FLOWFILE_NO_SHA1_TC | \ + FLOWFILE_NO_SHA256_TC | FLOWFILE_NO_SIZE_TC | FLOWFILE_NO_MIMETYPE_TC) #define FLOWFILE_NONE (FLOWFILE_NONE_TS|FLOWFILE_NONE_TC) #define FLOW_IS_IPV4(f) \ diff --git a/src/output-file.c b/src/output-file.c index e468f14d25c0..7b529e6d9c32 100644 --- a/src/output-file.c +++ b/src/output-file.c @@ -33,6 +33,7 @@ #include "detect-filemagic.h" #include "util-file.h" #include "util-magic.h" +#include "util-mimetype.h" #include "util-profiling.h" #include "util-validate.h" @@ -132,6 +133,10 @@ void OutputFileLogFfc(ThreadVars *tv, OutputFileLoggerThreadData *op_thread_data FilemagicThreadLookup(&op_thread_data->magic_ctx, ff); } #endif + + if (FileForceMimetype() && ff->mimetype == NULL) { + FileMimetypeLookup(ff); + } const OutputFileLogger *logger = list; const OutputLoggerThreadStore *store = op_thread_data->store; while (logger && store) { diff --git a/src/output-filedata.c b/src/output-filedata.c index 1e07902fdcd5..3b8ea9a63cb6 100644 --- a/src/output-filedata.c +++ b/src/output-filedata.c @@ -33,6 +33,7 @@ #include "util-validate.h" #include "util-magic.h" #include "util-path.h" +#include "util-mimetype.h" bool g_filedata_logger_enabled = false; @@ -136,6 +137,10 @@ void OutputFiledataLogFfc(ThreadVars *tv, OutputFiledataLoggerThreadData *td, Pa FilemagicThreadLookup(&td->magic_ctx, ff); } #endif + if (FileForceMimetype() && ff->mimetype == NULL) { + FileMimetypeLookup(ff); + } + if (ff->flags & FILE_STORED) { continue; } diff --git a/src/output-filestore.c b/src/output-filestore.c index b177ea2bf7f4..c41f19dfa51b 100644 --- a/src/output-filestore.c +++ b/src/output-filestore.c @@ -446,6 +446,12 @@ static OutputInitResult OutputFilestoreLogInitCtx(SCConfNode *conf) SCLogConfig("Filestore (v2) forcing magic lookup for stored files"); } + const char *force_mimetype = SCConfNodeLookupChildValue(conf, "force-mimetype"); + if (force_mimetype != NULL && SCConfValIsTrue(force_mimetype)) { + FileForceMimetypeEnable(); + SCLogConfig("forcing mimetype lookup for logged files"); + } + FileForceHashParseCfg(conf); /* The new filestore requires SHA256. */ diff --git a/src/output-json-alert.c b/src/output-json-alert.c index 184370a65edc..26b1d6f81fd2 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -65,6 +65,7 @@ #include "util-buffer.h" #include "util-reference-config.h" #include "util-validate.h" +#include "util-mimetype.h" #include "action-globals.h" diff --git a/src/output-json-file.c b/src/output-json-file.c index 9c046b3c7d18..f3ba44367362 100644 --- a/src/output-json-file.c +++ b/src/output-json-file.c @@ -327,6 +327,12 @@ static OutputInitResult OutputFileLogInitSub(SCConfNode *conf, OutputCtx *parent SCLogConfig("forcing magic lookup for logged files"); } + const char *force_mimetype = SCConfNodeLookupChildValue(conf, "force-mimetype"); + if (force_mimetype != NULL && SCConfValIsTrue(force_mimetype)) { + FileForceMimetypeEnable(); + SCLogConfig("forcing mimetype lookup for logged files"); + } + FileForceHashParseCfg(conf); } diff --git a/src/output-json.c b/src/output-json.c index 1e04e13c49dd..3d2072d20bf2 100644 --- a/src/output-json.c +++ b/src/output-json.c @@ -137,7 +137,12 @@ void EveFileInfo(SCJsonBuilder *jb, const File *ff, const uint64_t tx_id, const if (ff->magic) SCJbSetString(jb, "magic", (char *)ff->magic); #endif + + if (ff->mimetype) + SCJbSetString(jb, "mimetype", (char *)ff->mimetype); + SCJbSetBool(jb, "gaps", ff->flags & FILE_HAS_GAPS); + switch (ff->state) { case FILE_STATE_CLOSED: JB_SET_STRING(jb, "state", "CLOSED"); diff --git a/src/util-file.c b/src/util-file.c index e7b71e7070da..824ddfebb9b6 100644 --- a/src/util-file.c +++ b/src/util-file.c @@ -55,6 +55,11 @@ static int g_file_force_filestore = 0; */ static int g_file_force_magic = 0; +/** \brief switch to force mimetype checks on all files + * regardless of the rules. + */ +static int g_file_force_mimetype = 0; + /** \brief switch to force md5 calculation on all files * regardless of the rules. */ @@ -101,6 +106,12 @@ void FileForceMagicEnable(void) g_file_flow_mask |= (FLOWFILE_NO_MAGIC_TS|FLOWFILE_NO_MAGIC_TC); } +void FileForceMimetypeEnable(void) +{ + g_file_force_mimetype = 1; + g_file_flow_mask |= (FLOWFILE_NO_MIMETYPE_TS | FLOWFILE_NO_MIMETYPE_TC); +} + void FileForceMd5Enable(void) { g_file_force_md5 = 1; @@ -143,6 +154,11 @@ int FileForceMagic(void) return g_file_force_magic; } +int FileForceMimetype(void) +{ + return g_file_force_mimetype; +} + int FileForceMd5(void) { return g_file_force_md5; @@ -240,6 +256,10 @@ uint16_t FileFlowFlagsToFlags(const uint16_t flow_file_flags, uint8_t direction) if (flow_file_flags & FLOWFILE_NO_SHA256_TS) { flags |= FILE_NOSHA256; } + + if (flow_file_flags & FLOWFILE_NO_MIMETYPE_TS) { + flags |= FILE_NOMIMETYPE; + } } else { if ((flow_file_flags & (FLOWFILE_NO_STORE_TC | FLOWFILE_STORE_TC)) == FLOWFILE_NO_STORE_TC) { @@ -263,6 +283,10 @@ uint16_t FileFlowFlagsToFlags(const uint16_t flow_file_flags, uint8_t direction) if (flow_file_flags & FLOWFILE_NO_SHA256_TC) { flags |= FILE_NOSHA256; } + + if (flow_file_flags & FLOWFILE_NO_MIMETYPE_TC) { + flags |= FILE_NOMIMETYPE; + } } DEBUG_VALIDATE_BUG_ON((flags & (FILE_STORE | FILE_NOSTORE)) == (FILE_STORE | FILE_NOSTORE)); @@ -368,6 +392,15 @@ static int FilePruneFile(File *file, const StreamingBufferConfig *cfg) SCLogDebug("file->flags & FILE_NOMAGIC == true"); } #endif + if (!(file->flags & FILE_NOMIMETYPE)) { + /* need mimetype but haven't set it yet, bail out */ + if (file->mimetype == NULL) + SCReturnInt(0); + else + SCLogDebug("file->mimetype %s", file->mimetype); + } else { + SCLogDebug("file->flags & FILE_NOMIMETYPE == true"); + } uint64_t left_edge = FileDataSize(file); if (file->flags & FILE_STORE) { left_edge = MIN(left_edge,file->content_stored); @@ -579,6 +612,8 @@ static void FileFree(File *ff, const StreamingBufferConfig *sbcfg) if (ff->magic != NULL) SCFree(ff->magic); #endif + if (ff->mimetype != NULL) + SCRustCStringFree(ff->mimetype); if (ff->sb != NULL) { StreamingBufferFree(ff->sb, sbcfg); } @@ -913,6 +948,10 @@ static File *FileOpenFile(FileContainer *ffc, const StreamingBufferConfig *sbcfg SCLogDebug("not doing magic for this file"); ff->flags |= FILE_NOMAGIC; } + if (flags & FILE_NOMIMETYPE) { + SCLogDebug("not doing mimetype for this file"); + ff->flags |= FILE_NOMIMETYPE; + } if (flags & FILE_NOMD5) { SCLogDebug("not doing md5 for this file"); ff->flags |= FILE_NOMD5; diff --git a/src/util-file.h b/src/util-file.h index 3e42efda650d..e6cd7ac4f8fd 100644 --- a/src/util-file.h +++ b/src/util-file.h @@ -56,6 +56,7 @@ typedef struct SCMd5 SCMd5; #define FILE_STORED BIT_U16(11) #define FILE_NOTRACK BIT_U16(12) /**< track size of file */ #define FILE_USE_DETECT BIT_U16(13) /**< use content_inspected tracker */ +#define FILE_NOMIMETYPE BIT_U16(14) #define FILE_HAS_GAPS BIT_U16(15) // to be used instead of PATH_MAX which depends on the OS @@ -89,6 +90,7 @@ typedef struct File_ { #ifdef HAVE_MAGIC char *magic; #endif + char *mimetype; struct File_ *next; SCMd5 *md5_ctx; uint8_t md5[SC_MD5_LEN]; @@ -221,6 +223,9 @@ uint32_t FileReassemblyDepth(void); void FileForceMagicEnable(void); int FileForceMagic(void); +void FileForceMimetypeEnable(void); +int FileForceMimetype(void); + void FileForceMd5Enable(void); int FileForceMd5(void); diff --git a/src/util-mimetype.c b/src/util-mimetype.c new file mode 100644 index 000000000000..fa2c00aa5c72 --- /dev/null +++ b/src/util-mimetype.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + * + */ + +#include "suricata-common.h" +#include "util-file.h" +#include "util-mimetype.h" +#include "rust.h" + +#define FILE_MIMETYPE_MIN_SIZE 512 + +int FileMimetypeLookup(File *file) +{ + if (file == NULL || FileDataSize(file) == 0) { + SCReturnInt(-1); + } + + const uint8_t *data = NULL; + uint32_t data_len = 0; + uint64_t offset = 0; + + StreamingBufferGetData(file->sb, &data, &data_len, &offset); + if (offset == 0) { + if (FileDataSize(file) >= FILE_MIMETYPE_MIN_SIZE) { + file->mimetype = SCGetMimetype(data, data_len); + } else if (file->state >= FILE_STATE_CLOSED) { + file->mimetype = SCGetMimetype(data, data_len); + } + } + SCReturnInt(0); +} diff --git a/src/util-mimetype.h b/src/util-mimetype.h new file mode 100644 index 000000000000..88effd2b7d55 --- /dev/null +++ b/src/util-mimetype.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + * + */ + +#ifndef SURICATA_UTIL_MIMETYPE_H +#define SURICATA_UTIL_MIMETYPE_H + +int FileMimetypeLookup(File *file); + +#endif /* SURICATA_UTIL_MIMETYPE_H */ diff --git a/suricata.yaml.in b/suricata.yaml.in index 9b96a59ea3d5..5b49dc1e52ac 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -292,6 +292,7 @@ outputs: #custom: [subject, issuer, session_resumed, serial, fingerprint, sni, version, not_before, not_after, certificate, chain, ja3, ja3s, ja4, subjectaltname, client, client_certificate, client_chain, client_alpns, server_alpns, client_handshake, server_handshake] - files: force-magic: no # force logging magic on all logged files + force-mimetype: no # force logging mime type on all logged files # force logging of checksums, available hash functions are md5, # sha1 and sha256 #force-hash: [md5] From 0b4807bb93065f7e56906f8450fe63352f54c20d Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sun, 6 Jul 2025 19:18:13 +0200 Subject: [PATCH 03/10] configure: add GPL inclusion flag This patch adds a flag `--enable-gpl-mimetype` to force the usage of `tree_magic_db` that is GPL license and will not allow double licensing. This will allow the built Suricata to have mime type identification not dependant of the operating system database. Ticket: 7816 --- configure.ac | 9 +++++++++ rust/Cargo.toml.in | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index afe93bd8fa7e..c1ac87629d2a 100644 --- a/configure.ac +++ b/configure.ac @@ -1765,6 +1765,15 @@ fi fi + # mimetype + AC_ARG_ENABLE(gpl-mimetype, + AS_HELP_STRING([--enable-gpl-mimetype], [Embed GPL data in mimetype support [default=no]]), + [enable_gpl_mimetype="$enableval"],[enable_gpl_mimetype=no]) + AS_IF([test "x$enable_gpl_mimetype" = "xyes"], [ + WITH_GPL_DATA="\"with-gpl-data\"" + AC_SUBST(WITH_GPL_DATA) + ]) + # Napatech - Using the 3GD API AC_ARG_ENABLE(napatech, AS_HELP_STRING([--enable-napatech],[Enable Napatech Devices]), diff --git a/rust/Cargo.toml.in b/rust/Cargo.toml.in index 6ff8bc7f464c..0adad3c496f2 100644 --- a/rust/Cargo.toml.in +++ b/rust/Cargo.toml.in @@ -58,7 +58,7 @@ lru = "~0.12.5" der-parser = { version = "~9.0.0", default-features = false } kerberos-parser = { version = "~0.8.0", default-features = false } -tree_magic_mini = "~3.1.6" +tree_magic_mini = { version = "~3.1.6", features = [@WITH_GPL_DATA@] } sawp-modbus = "~0.13.1" sawp-pop3 = "~0.13.1" From dbefe7c47cd971f0b9e691191862e9e16683453d Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 14 Jul 2025 23:35:15 +0200 Subject: [PATCH 04/10] file: add HAVE_MIMETYPE feature This can be used with the requires keyword to avoid signatures load failure. Ticket: 7816 --- src/suricata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/suricata.c b/src/suricata.c index f7b62c9dd2cc..101251503a92 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -807,6 +807,7 @@ static void PrintBuildInfo(void) strlcat(features, "HAVE_JA4 ", sizeof(features)); #endif strlcat(features, "HAVE_LIBJANSSON ", sizeof(features)); + strlcat(features, "HAVE_MIMETYPE ", sizeof(features)); #ifdef PROFILING strlcat(features, "PROFILING ", sizeof(features)); #endif From 6d0df64d443ad73b25084435cf6dd7a39ab2cd54 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sun, 6 Jul 2025 21:37:57 +0200 Subject: [PATCH 05/10] eve/alert: compute mimetype if needed As file data may have not been inspected yet, this is going to miss in the event even if it could be available. Let's compute it before logging the file. Ticket: 7816 --- src/output-json-alert.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/output-json-alert.c b/src/output-json-alert.c index 26b1d6f81fd2..aa5f2326e90a 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -488,6 +488,8 @@ static void AlertAddFiles(const Packet *p, SCJsonBuilder *jb, const uint64_t tx_ isopen = true; SCJbOpenArray(jb, "files"); } + if (FileForceMimetype() && file->mimetype == NULL) + FileMimetypeLookup(file); SCJbStartObject(jb); EveFileInfo(jb, file, tx_id, file->flags); SCJbClose(jb); From e878717fd73bc1375810181787ffede499759eff Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sun, 6 Jul 2025 20:33:31 +0200 Subject: [PATCH 06/10] doc/userguide: add information about file.mimetype Ticket: 7816 --- doc/userguide/rules/file-keywords.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/userguide/rules/file-keywords.rst b/doc/userguide/rules/file-keywords.rst index 91637576852a..1ad21f620cf3 100644 --- a/doc/userguide/rules/file-keywords.rst +++ b/doc/userguide/rules/file-keywords.rst @@ -127,6 +127,21 @@ here: https://redmine.openinfosecfoundation.org/issues/437 ``file.magic`` supports multiple buffer matching, see :doc:`multi-buffer-matching`. +file.mimetype +------------- + +Sticky buffer that matches on the MIME type guessed from the binary content of a file. + +Example:: + + file.mimetype; content:"application/vnd.microsoft.portable-executable"; + +``file.mimetype`` supports multiple buffer matching, see :doc:`multi-buffer-matching`. + +The MIME type analysis is faster than the magic analysis and the identification is also +more reproducible across different Suricata versions and operating systems. Being a +standard, this is also improving correlation with other tools. + filestore --------- From 022cbea77840e111498ab8de605bbe49ff5fa961 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Fri, 25 Jul 2025 08:58:25 +0200 Subject: [PATCH 07/10] configure: add display about mimetype db inclusion --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index c1ac87629d2a..8306c1771b59 100644 --- a/configure.ac +++ b/configure.ac @@ -2587,6 +2587,7 @@ SURICATA_BUILD_CONF="Suricata Configuration: Detection enabled: ${enable_detection} Libmagic support: ${enable_magic} + GPL Mimetype DB inclusion: ${enable_gpl_mimetype} libjansson support: ${enable_jansson} hiredis support: ${enable_hiredis} hiredis async with libevent: ${enable_hiredis_async} From d4cc8d629214efebdf84ff40be21b7102920d852 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 29 Jul 2025 21:53:37 +0200 Subject: [PATCH 08/10] detect/mimetype: optional build --- configure.ac | 25 ++++++++++++++++++------- src/detect-file-mimetype.c | 26 ++++++++++++++++++++++++++ src/output-file.c | 3 ++- src/output-filedata.c | 2 ++ src/output-json-alert.c | 2 ++ src/suricata.c | 2 ++ 6 files changed, 52 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 8306c1771b59..0d0c2a37ddb1 100644 --- a/configure.ac +++ b/configure.ac @@ -1766,13 +1766,23 @@ fi # mimetype - AC_ARG_ENABLE(gpl-mimetype, - AS_HELP_STRING([--enable-gpl-mimetype], [Embed GPL data in mimetype support [default=no]]), - [enable_gpl_mimetype="$enableval"],[enable_gpl_mimetype=no]) - AS_IF([test "x$enable_gpl_mimetype" = "xyes"], [ - WITH_GPL_DATA="\"with-gpl-data\"" - AC_SUBST(WITH_GPL_DATA) - ]) + AC_ARG_ENABLE(mimetype, + AS_HELP_STRING([--enable-mimetype], [Enable mimetype support [default=no]]), + [enable_mimetype="$enableval"],[enable_mimetype=no]) + AS_IF([test "x$enable_mimetype" = "xyes"], [ + AC_DEFINE([HAVE_MIMETYPE],[1],(Mimetype support enabled)) + AC_ARG_ENABLE(bundled-gpl-mimetype, + AS_HELP_STRING([--disable-bundled-gpl-mimetype], [Embed GPL data in mimetype support [default=yes]]), + [enable_gpl_mimetype="$enableval"],[enable_gpl_mimetype=yes]) + AS_IF([test "x$enable_gpl_mimetype" = "xyes"], [ + WITH_GPL_DATA="\"with-gpl-data\"" + AC_SUBST(WITH_GPL_DATA) + ]) + ] + ) + if test "x$enable_gpl_mimetype" != "xyes"; then + enable_gpl_mimetype="no" + fi # Napatech - Using the 3GD API AC_ARG_ENABLE(napatech, @@ -2587,6 +2597,7 @@ SURICATA_BUILD_CONF="Suricata Configuration: Detection enabled: ${enable_detection} Libmagic support: ${enable_magic} + mimetype support: ${enable_mimetype} GPL Mimetype DB inclusion: ${enable_gpl_mimetype} libjansson support: ${enable_jansson} hiredis support: ${enable_hiredis} diff --git a/src/detect-file-mimetype.c b/src/detect-file-mimetype.c index 2a371dd24076..993e9dfc47c7 100644 --- a/src/detect-file-mimetype.c +++ b/src/detect-file-mimetype.c @@ -36,6 +36,30 @@ #include "util-profiling.h" #include "app-layer-parser.h" +#include "conf.h" + +#ifndef HAVE_MIMETYPE + +static int DetectFileMimetypeSetupNoSupport(DetectEngineCtx *de_ctx, Signature *s, const char *str) +{ + SCLogError("no libmagic support built in, needed for filemagic keyword"); + return -1; +} + +/** + * \brief Registration function for keyword: filemagic + */ +void DetectFileMimetypeRegister(void) +{ + sigmatch_table[DETECT_FILE_MIMETYPE].name = "file.mimetype"; + sigmatch_table[DETECT_FILE_MIMETYPE].desc = "sticky buffer to match on file mime type"; + sigmatch_table[DETECT_FILE_MIMETYPE].url = "/rules/file-keywords.html#file_mimetype"; + sigmatch_table[DETECT_FILE_MIMETYPE].Setup = DetectFileMimetypeSetupNoSupport; + sigmatch_table[DETECT_FILE_MIMETYPE].flags = SIGMATCH_NOOPT | SIGMATCH_INFO_STICKY_BUFFER; +} + +#else /* HAVE_MIMETYPE */ + static int g_file_match_list_id = 0; static int DetectFileMimetypeSetup(DetectEngineCtx *de_ctx, Signature *s, const char *str); @@ -204,3 +228,5 @@ static unsigned char DetectEngineInspectFileMimetype(DetectEngineCtx *de_ctx, } return r; } + +#endif /* HAVE_MIMETYPE */ diff --git a/src/output-file.c b/src/output-file.c index 7b529e6d9c32..5b8a313495de 100644 --- a/src/output-file.c +++ b/src/output-file.c @@ -133,10 +133,11 @@ void OutputFileLogFfc(ThreadVars *tv, OutputFileLoggerThreadData *op_thread_data FilemagicThreadLookup(&op_thread_data->magic_ctx, ff); } #endif - +#ifdef HAVE_MIMETYPE if (FileForceMimetype() && ff->mimetype == NULL) { FileMimetypeLookup(ff); } +#endif const OutputFileLogger *logger = list; const OutputLoggerThreadStore *store = op_thread_data->store; while (logger && store) { diff --git a/src/output-filedata.c b/src/output-filedata.c index 3b8ea9a63cb6..37ed310d5be9 100644 --- a/src/output-filedata.c +++ b/src/output-filedata.c @@ -137,9 +137,11 @@ void OutputFiledataLogFfc(ThreadVars *tv, OutputFiledataLoggerThreadData *td, Pa FilemagicThreadLookup(&td->magic_ctx, ff); } #endif +#ifdef HAVE_MIMETYPE if (FileForceMimetype() && ff->mimetype == NULL) { FileMimetypeLookup(ff); } +#endif if (ff->flags & FILE_STORED) { continue; diff --git a/src/output-json-alert.c b/src/output-json-alert.c index aa5f2326e90a..3a5274aaece9 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -488,8 +488,10 @@ static void AlertAddFiles(const Packet *p, SCJsonBuilder *jb, const uint64_t tx_ isopen = true; SCJbOpenArray(jb, "files"); } +#ifdef HAVE_MIMETYPE if (FileForceMimetype() && file->mimetype == NULL) FileMimetypeLookup(file); +#endif SCJbStartObject(jb); EveFileInfo(jb, file, tx_id, file->flags); SCJbClose(jb); diff --git a/src/suricata.c b/src/suricata.c index 101251503a92..7e2020683fba 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -807,7 +807,9 @@ static void PrintBuildInfo(void) strlcat(features, "HAVE_JA4 ", sizeof(features)); #endif strlcat(features, "HAVE_LIBJANSSON ", sizeof(features)); +#ifdef HAVE_MIMETYPE strlcat(features, "HAVE_MIMETYPE ", sizeof(features)); +#endif #ifdef PROFILING strlcat(features, "PROFILING ", sizeof(features)); #endif From f9b40952c1adb86a08dc66cec97300e4a15ba012 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 29 Jul 2025 21:59:34 +0200 Subject: [PATCH 09/10] ci: add a build with mimetype for S-V --- .github/workflows/builds.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index e35078835e57..13dbaefa9673 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -1328,7 +1328,7 @@ jobs: - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - run: ./autogen.sh - - run: ./configure --enable-unittests --enable-coccinelle + - run: ./configure --enable-unittests --enable-coccinelle --enable-mimetype --enable-bundled-gpl-mimetype - run: make -j ${{ env.CPUS }} - run: CONCURRENCY_LEVEL=${{ env.CPUS }} make check - run: python3 ./suricata-verify/run.py -q --debug-failed From a087e7780890dd3aaa68c018698a2386bf16cb43 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 4 Aug 2025 23:27:40 +0200 Subject: [PATCH 10/10] configure: conditional build of mimetype Only build the tree_magic_mini crate if needed. --- configure.ac | 4 ++++ rust/Cargo.toml.in | 3 ++- rust/Makefile.am | 4 ++++ rust/src/lib.rs | 1 + src/util-mimetype.c | 4 ++++ 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 0d0c2a37ddb1..4167bbb0d384 100644 --- a/configure.ac +++ b/configure.ac @@ -1778,6 +1778,10 @@ WITH_GPL_DATA="\"with-gpl-data\"" AC_SUBST(WITH_GPL_DATA) ]) + AM_CONDITIONAL([HAVE_MIMETYPE], [true]) + ], + [ + AM_CONDITIONAL([HAVE_MIMETYPE], [false]) ] ) if test "x$enable_gpl_mimetype" != "xyes"; then diff --git a/rust/Cargo.toml.in b/rust/Cargo.toml.in index 0adad3c496f2..2bc7ea5a7be9 100644 --- a/rust/Cargo.toml.in +++ b/rust/Cargo.toml.in @@ -35,6 +35,7 @@ debug = [] debug-validate = [] ja3 = [] ja4 = [] +filemimetype = ["dep:tree_magic_mini"] [dependencies] nom7 = { version="7.1", package="nom" } @@ -58,7 +59,7 @@ lru = "~0.12.5" der-parser = { version = "~9.0.0", default-features = false } kerberos-parser = { version = "~0.8.0", default-features = false } -tree_magic_mini = { version = "~3.1.6", features = [@WITH_GPL_DATA@] } +tree_magic_mini = { version = "~3.1.6", features = [@WITH_GPL_DATA@], optional = true } sawp-modbus = "~0.13.1" sawp-pop3 = "~0.13.1" diff --git a/rust/Makefile.am b/rust/Makefile.am index 49104786ae7e..b0b87f674d4f 100644 --- a/rust/Makefile.am +++ b/rust/Makefile.am @@ -39,6 +39,10 @@ if DEBUG RUST_FEATURES += debug endif +if HAVE_MIMETYPE +RUST_FEATURES += filemimetype +endif + if DEBUG_VALIDATION RUST_FEATURES += debug-validate endif diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 4fe26408396e..29f30cb5abe1 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -90,6 +90,7 @@ pub mod applayer; pub mod frames; pub mod filecontainer; pub mod filetracker; +#[cfg(feature = "filemimetype")] pub mod filemimetype; pub mod kerberos; pub mod detect; diff --git a/src/util-mimetype.c b/src/util-mimetype.c index fa2c00aa5c72..bdb67ed6940e 100644 --- a/src/util-mimetype.c +++ b/src/util-mimetype.c @@ -27,6 +27,8 @@ #include "util-mimetype.h" #include "rust.h" +#ifdef HAVE_MIMETYPE + #define FILE_MIMETYPE_MIN_SIZE 512 int FileMimetypeLookup(File *file) @@ -49,3 +51,5 @@ int FileMimetypeLookup(File *file) } SCReturnInt(0); } + +#endif /* HAVE_MIMETYPE */