From e800cde5b460bbeb541f4a4abba3933d6bc133b8 Mon Sep 17 00:00:00 2001 From: Alathreon Date: Sun, 3 Aug 2025 23:52:04 +0200 Subject: [PATCH 1/2] [feature/handle-similar-messages-as-scam] Added a method to hash strings and refactored existing code --- .../features/moderation/scam/ScamHistoryStore.java | 4 +--- .../togetherjava/tjbot/features/utils/Hashing.java | 12 ++++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamHistoryStore.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamHistoryStore.java index 3348d4ee5b..5f4e7cdb4f 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamHistoryStore.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamHistoryStore.java @@ -7,7 +7,6 @@ import org.togetherjava.tjbot.db.generated.tables.records.ScamHistoryRecord; import org.togetherjava.tjbot.features.utils.Hashing; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Collection; @@ -138,8 +137,7 @@ public void deleteHistoryOlderThan(Instant olderThan) { * @return a text representation of the hash */ public static String hashMessageContent(Message message) { - return Hashing.bytesToHex(Hashing.hash(HASH_METHOD, - message.getContentRaw().getBytes(StandardCharsets.UTF_8))); + return Hashing.bytesToHex(Hashing.hashUTF8(HASH_METHOD, message.getContentRaw())); } /** diff --git a/application/src/main/java/org/togetherjava/tjbot/features/utils/Hashing.java b/application/src/main/java/org/togetherjava/tjbot/features/utils/Hashing.java index 245f942783..551111921c 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/utils/Hashing.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/utils/Hashing.java @@ -57,4 +57,16 @@ public static byte[] hash(String method, byte[] data) { throw new IllegalStateException("Hash method must be supported", e); } } + + /** + * Hashes the given UTF 8 text using the given method, see {@link Hashing#hash(String, byte[])}. + * + * @param method the method to use for hashing, must be supported by {@link MessageDigest}, e.g. + * {@code "SHA"} + * @param text the UTF 8 text to hash + * @return the computed hash + */ + public static byte[] hashUTF8(String method, String text) { + return hash(method, text.getBytes(StandardCharsets.UTF_8)); + } } From 70375f7d9f5719c327c9d4295bd3a30a64682655 Mon Sep 17 00:00:00 2001 From: Alathreon Date: Sun, 3 Aug 2025 23:52:23 +0200 Subject: [PATCH 2/2] [feature/handle-similar-messages-as-scam] --- application/config.json.template | 6 +- .../tjbot/config/ScamBlockerConfig.java | 52 ++++++++- .../features/moderation/scam/MessageInfo.java | 28 +++++ .../features/moderation/scam/ScamBlocker.java | 30 +++-- .../scam/SimilarMessagesDetector.java | 109 ++++++++++++++++++ 5 files changed, 213 insertions(+), 12 deletions(-) create mode 100644 application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/MessageInfo.java create mode 100644 application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/SimilarMessagesDetector.java diff --git a/application/config.json.template b/application/config.json.template index d1ec6559f3..9a4829a285 100644 --- a/application/config.json.template +++ b/application/config.json.template @@ -89,7 +89,11 @@ ], "isHostSimilarToKeywordDistanceThreshold": 2, "suspiciousAttachmentsThreshold": 3, - "suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}" + "suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}", + "maxAllowedSimilarMessages": 2, + "similarMessagesWindow": 1, + "similarMessageLengthIgnore": 10, + "similarMessagesWhitelist": [] }, "wolframAlphaAppId": "79J52T-6239TVXHR7", "helpSystem": { diff --git a/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java b/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java index b93119022e..640f37af81 100644 --- a/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java +++ b/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java @@ -26,6 +26,10 @@ public final class ScamBlockerConfig { private final int isHostSimilarToKeywordDistanceThreshold; private final int suspiciousAttachmentsThreshold; private final String suspiciousAttachmentNamePattern; + private final int maxAllowedSimilarMessages; + private final int similarMessagesWindow; + private final int similarMessageLengthIgnore; + private final Set similarMessagesWhitelist; @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mode, @@ -46,7 +50,12 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo @JsonProperty(value = "suspiciousAttachmentsThreshold", required = true) int suspiciousAttachmentsThreshold, @JsonProperty(value = "suspiciousAttachmentNamePattern", - required = true) String suspiciousAttachmentNamePattern) { + required = true) String suspiciousAttachmentNamePattern, + @JsonProperty(value = "maxAllowedSimilarMessages") int maxAllowedSimilarMessages, + @JsonProperty(value = "similarMessagesWindow") int similarMessagesWindow, + @JsonProperty(value = "similarMessageLengthIgnore") int similarMessageLengthIgnore, + @JsonProperty( + value = "similarMessagesWhitelist") Set similarMessagesWhitelist) { this.mode = Objects.requireNonNull(mode); this.reportChannelPattern = Objects.requireNonNull(reportChannelPattern); this.botTrapChannelPattern = Objects.requireNonNull(botTrapChannelPattern); @@ -59,6 +68,10 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo this.suspiciousAttachmentsThreshold = suspiciousAttachmentsThreshold; this.suspiciousAttachmentNamePattern = Objects.requireNonNull(suspiciousAttachmentNamePattern); + this.maxAllowedSimilarMessages = maxAllowedSimilarMessages; + this.similarMessagesWindow = similarMessagesWindow; + this.similarMessageLengthIgnore = similarMessageLengthIgnore; + this.similarMessagesWhitelist = similarMessagesWhitelist; } /** @@ -167,6 +180,43 @@ public String getSuspiciousAttachmentNamePattern() { return suspiciousAttachmentNamePattern; } + /** + * Gets the maximum amount of allowed messages before it gets flagged by the scam detector. + * + * @return the maximum amount of allowed messages + */ + public int getMaxAllowedSimilarMessages() { + return maxAllowedSimilarMessages; + } + + /** + * Gets the window in minutes to which messages are kept in the similar messages feature. + * + * @return the window in minutes to keep the messages + */ + public int getSimilarMessagesWindow() { + return similarMessagesWindow; + } + + /** + * Gets the maximum length allowed before the message gets monitored by the similar message + * feature. + * + * @return maximum length allowed + */ + public int getSimilarMessageLengthIgnore() { + return similarMessageLengthIgnore; + } + + /** + * Gets the set of messages that are allowed to be spammed in the similar messages feature. + * + * @return set of whitelisted messages + */ + public Set getSimilarMessagesWhitelist() { + return similarMessagesWhitelist; + } + /** * Mode of a scam blocker. Controls which actions it takes when detecting scam. */ diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/MessageInfo.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/MessageInfo.java new file mode 100644 index 0000000000..822af637f7 --- /dev/null +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/MessageInfo.java @@ -0,0 +1,28 @@ +package org.togetherjava.tjbot.features.moderation.scam; + + +import java.time.Instant; +import java.util.Objects; + +/** + * Information about a message, used to detect spam of the same message by the same user in + * different channels. + * + * @param userId the id of the user + * @param channelId the channel where the message was posted + * @param messageHash the hash of the message + * @param timestamp when the message was posted + */ +public record MessageInfo(long userId, long channelId, String messageHash, Instant timestamp) { + + @Override + public boolean equals(Object other) { + return other instanceof MessageInfo message && this.userId == message.userId + && this.channelId == message.channelId; + } + + @Override + public int hashCode() { + return Objects.hash(userId, channelId); + } +} diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java index 730d7eef14..e0d1e868ff 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java @@ -25,9 +25,7 @@ import org.togetherjava.tjbot.config.Config; import org.togetherjava.tjbot.config.ScamBlockerConfig; -import org.togetherjava.tjbot.features.MessageReceiverAdapter; -import org.togetherjava.tjbot.features.UserInteractionType; -import org.togetherjava.tjbot.features.UserInteractor; +import org.togetherjava.tjbot.features.*; import org.togetherjava.tjbot.features.componentids.ComponentIdGenerator; import org.togetherjava.tjbot.features.componentids.ComponentIdInteractor; import org.togetherjava.tjbot.features.moderation.ModerationAction; @@ -38,11 +36,8 @@ import org.togetherjava.tjbot.logging.LogMarkers; import java.awt.Color; -import java.util.Collection; -import java.util.EnumSet; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; +import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.function.Predicate; import java.util.function.UnaryOperator; @@ -55,7 +50,7 @@ * If scam is detected, depending on the configuration, the blockers actions range from deleting the * message and banning the author to just logging the message for auditing. */ -public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor { +public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor, Routine { private static final Logger logger = LoggerFactory.getLogger(ScamBlocker.class); private static final Color AMBIENT_COLOR = Color.decode("#CFBFF5"); private static final Set MODES_WITH_IMMEDIATE_DELETION = @@ -72,8 +67,8 @@ public final class ScamBlocker extends MessageReceiverAdapter implements UserInt private final ModerationActionsStore actionsStore; private final ScamHistoryStore scamHistoryStore; private final Predicate hasRequiredRole; - private final ComponentIdInteractor componentIdInteractor; + private final SimilarMessagesDetector similarMessagesDetector; /** * Creates a new listener to receive all message sent in any channel. @@ -103,6 +98,7 @@ public ScamBlocker(ModerationActionsStore actionsStore, ScamHistoryStore scamHis hasRequiredRole = Pattern.compile(config.getSoftModerationRolePattern()).asMatchPredicate(); componentIdInteractor = new ComponentIdInteractor(getInteractionType(), getName()); + similarMessagesDetector = new SimilarMessagesDetector(config.getScamBlocker()); } @Override @@ -141,6 +137,10 @@ public void onMessageReceived(MessageReceivedEvent event) { isSafe = false; } + if (isSafe && similarMessagesDetector.doSimilarMessageCheck(event)) { + isSafe = false; + } + if (isSafe) { return; } @@ -153,6 +153,16 @@ public void onMessageReceived(MessageReceivedEvent event) { takeAction(event); } + @Override + public Schedule createSchedule() { + return new Schedule(ScheduleMode.FIXED_RATE, 1, 1, TimeUnit.MINUTES); + } + + @Override + public void runRoutine(JDA jda) { + similarMessagesDetector.runRoutine(); + } + private void takeActionWasAlreadyReported(MessageReceivedEvent event) { // The user recently send the same scam already, and that was already reported and handled addScamToHistory(event); diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/SimilarMessagesDetector.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/SimilarMessagesDetector.java new file mode 100644 index 0000000000..c486b4701d --- /dev/null +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/SimilarMessagesDetector.java @@ -0,0 +1,109 @@ +package org.togetherjava.tjbot.features.moderation.scam; + +import net.dv8tion.jda.api.entities.Message; +import net.dv8tion.jda.api.events.message.MessageReceivedEvent; + +import org.togetherjava.tjbot.config.ScamBlockerConfig; +import org.togetherjava.tjbot.features.utils.Hashing; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Class which tries to detect scams by monitoring similar messages. + */ +public class SimilarMessagesDetector { + private static final String HASH_METHOD = "SHA"; + + private final ScamBlockerConfig scamBlockerConfig; + private final Set messageCache; + private final Set alreadyFlaggedUsers; + + /** + * Creates an instance of this class by using the given config. + * + * @param scamBlockerConfig the scam config + */ + public SimilarMessagesDetector(ScamBlockerConfig scamBlockerConfig) { + this.scamBlockerConfig = scamBlockerConfig; + this.messageCache = new HashSet<>(); + this.alreadyFlaggedUsers = new HashSet<>(); + } + + private boolean shouldIgnore(Message message) { + if (!message.getAttachments().isEmpty()) { + return false; + } + if (message.getContentRaw().length() <= scamBlockerConfig.getSimilarMessageLengthIgnore()) { + return true; + } + return scamBlockerConfig.getSimilarMessagesWhitelist() + .contains(message.getContentRaw().toLowerCase()); + } + + private MessageInfo addToMessageCache(MessageReceivedEvent event) { + long userId = event.getAuthor().getIdLong(); + long channelId = event.getChannel().getIdLong(); + String messageHash = getHash(event.getMessage()); + Instant timestamp = event.getMessage().getTimeCreated().toInstant(); + MessageInfo messageInfo = new MessageInfo(userId, channelId, messageHash, timestamp); + messageCache.add(messageInfo); + return messageInfo; + } + + private String getHash(Message message) { + String wholeText = message.getContentRaw() + message.getAttachments() + .stream() + .map(Message.Attachment::getFileName) + .collect(Collectors.joining()); + return Hashing.bytesToHex(Hashing.hashUTF8(HASH_METHOD, wholeText)); + } + + private boolean hasPostedTooManySimilarMessages(long userId, String messageHash) { + long similarMessageCount = messageCache.stream() + .filter(m -> m.userId() == userId && m.messageHash().equals(messageHash) + && !isObsolete(m)) + .count(); + return similarMessageCount > scamBlockerConfig.getMaxAllowedSimilarMessages(); + } + + private boolean isObsolete(MessageInfo messageInfo) { + return messageInfo.timestamp() + .plus(scamBlockerConfig.getSimilarMessagesWindow(), ChronoUnit.MINUTES) + .isBefore(Instant.now()); + } + + /** + * Stores message data and if many messages of same author, different channel and same content + * is posted several times, returns true. + * + * @param event the message event + * @return true if the user spammed the message in several channels, false otherwise + */ + public boolean doSimilarMessageCheck(MessageReceivedEvent event) { + long userId = event.getAuthor().getIdLong(); + if (alreadyFlaggedUsers.contains(userId)) { + return true; + } + if (shouldIgnore(event.getMessage())) { + return false; + } + String hash = addToMessageCache(event).messageHash(); + if (hasPostedTooManySimilarMessages(userId, hash)) { + alreadyFlaggedUsers.add(userId); + return true; + } else { + return false; + } + } + + /** + * Has to be called often to clear the cache. + */ + public void runRoutine() { + messageCache.removeIf(this::isObsolete); + } +}