Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion application/config.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@
],
"isHostSimilarToKeywordDistanceThreshold": 2,
"suspiciousAttachmentsThreshold": 3,
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}"
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}",
"maxAllowedSimilarMessages": 2,
"similarMessagesWindow": 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing time unit. please add it to the name.

"similarMessageLengthIgnore": 10,
"similarMessagesWhitelist": []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont think we need that, please remove.

the UX for it for maintainers would be really weird, having to bloat the config with 100 character long messages and whatnot. and then it didnt work bc of a smiley or other stuff that ends up slightly different.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All 4? or just the whitelist ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, the preview didnt turn out the way i wanted. i was referring to the message whitelist.

},
"wolframAlphaAppId": "79J52T-6239TVXHR7",
"helpSystem": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public final class ScamBlockerConfig {
private final int isHostSimilarToKeywordDistanceThreshold;
private final int suspiciousAttachmentsThreshold;
private final String suspiciousAttachmentNamePattern;
private final int maxAllowedSimilarMessages;
private final int similarMessagesWindow;
private final int similarMessageLengthIgnore;
private final Set<String> similarMessagesWhitelist;

@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mode,
Expand All @@ -46,7 +50,12 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo
@JsonProperty(value = "suspiciousAttachmentsThreshold",
required = true) int suspiciousAttachmentsThreshold,
@JsonProperty(value = "suspiciousAttachmentNamePattern",
required = true) String suspiciousAttachmentNamePattern) {
required = true) String suspiciousAttachmentNamePattern,
@JsonProperty(value = "maxAllowedSimilarMessages") int maxAllowedSimilarMessages,
@JsonProperty(value = "similarMessagesWindow") int similarMessagesWindow,
@JsonProperty(value = "similarMessageLengthIgnore") int similarMessageLengthIgnore,
@JsonProperty(
value = "similarMessagesWhitelist") Set<String> similarMessagesWhitelist) {
this.mode = Objects.requireNonNull(mode);
this.reportChannelPattern = Objects.requireNonNull(reportChannelPattern);
this.botTrapChannelPattern = Objects.requireNonNull(botTrapChannelPattern);
Expand All @@ -59,6 +68,10 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo
this.suspiciousAttachmentsThreshold = suspiciousAttachmentsThreshold;
this.suspiciousAttachmentNamePattern =
Objects.requireNonNull(suspiciousAttachmentNamePattern);
this.maxAllowedSimilarMessages = maxAllowedSimilarMessages;
this.similarMessagesWindow = similarMessagesWindow;
this.similarMessageLengthIgnore = similarMessageLengthIgnore;
this.similarMessagesWhitelist = similarMessagesWhitelist;
}

/**
Expand Down Expand Up @@ -167,6 +180,43 @@ public String getSuspiciousAttachmentNamePattern() {
return suspiciousAttachmentNamePattern;
}

/**
* Gets the maximum amount of allowed messages before it gets flagged by the scam detector.
*
* @return the maximum amount of allowed messages
*/
public int getMaxAllowedSimilarMessages() {
return maxAllowedSimilarMessages;
}

/**
* Gets the window in minutes to which messages are kept in the similar messages feature.
*
* @return the window in minutes to keep the messages
*/
public int getSimilarMessagesWindow() {
return similarMessagesWindow;
}

/**
* Gets the maximum length allowed before the message gets monitored by the similar message
* feature.
*
* @return maximum length allowed
*/
public int getSimilarMessageLengthIgnore() {
return similarMessageLengthIgnore;
}

/**
* Gets the set of messages that are allowed to be spammed in the similar messages feature.
*
* @return set of whitelisted messages
*/
public Set<String> getSimilarMessagesWhitelist() {
return similarMessagesWhitelist;
}

/**
* Mode of a scam blocker. Controls which actions it takes when detecting scam.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package org.togetherjava.tjbot.features.moderation.scam;


import java.time.Instant;
import java.util.Objects;

/**
* Information about a message, used to detect spam of the same message by the same user in
* different channels.
*
* @param userId the id of the user
* @param channelId the channel where the message was posted
* @param messageHash the hash of the message
* @param timestamp when the message was posted
*/
public record MessageInfo(long userId, long channelId, String messageHash, Instant timestamp) {

@Override
public boolean equals(Object other) {
return other instanceof MessageInfo message && this.userId == message.userId
&& this.channelId == message.channelId;
}

@Override
public int hashCode() {
return Objects.hash(userId, channelId);
}
Comment on lines +18 to +27
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i find that quite confusing, not what someone using this record would expect.
it says that two completely different messages posted in the same channel by the same author are considered identical MessageInfo.

if you need this to determine duplicates in ur Set it would probably be better to go for something like record MessageLocation(long userId, long channelId) {} and then go for Map<MessageLocation, MessageInfo> instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm ill try that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I tried and the problem is that by using a map it just makes the code less readable.
Do you have another alternative ? I could maybe not use a record.

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@

import org.togetherjava.tjbot.config.Config;
import org.togetherjava.tjbot.config.ScamBlockerConfig;
import org.togetherjava.tjbot.features.MessageReceiverAdapter;
import org.togetherjava.tjbot.features.UserInteractionType;
import org.togetherjava.tjbot.features.UserInteractor;
import org.togetherjava.tjbot.features.*;
import org.togetherjava.tjbot.features.componentids.ComponentIdGenerator;
import org.togetherjava.tjbot.features.componentids.ComponentIdInteractor;
import org.togetherjava.tjbot.features.moderation.ModerationAction;
Expand All @@ -38,11 +36,8 @@
import org.togetherjava.tjbot.logging.LogMarkers;

import java.awt.Color;
import java.util.Collection;
import java.util.EnumSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
Expand All @@ -55,7 +50,7 @@
* If scam is detected, depending on the configuration, the blockers actions range from deleting the
* message and banning the author to just logging the message for auditing.
*/
public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor {
public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor, Routine {
private static final Logger logger = LoggerFactory.getLogger(ScamBlocker.class);
private static final Color AMBIENT_COLOR = Color.decode("#CFBFF5");
private static final Set<ScamBlockerConfig.Mode> MODES_WITH_IMMEDIATE_DELETION =
Expand All @@ -72,8 +67,8 @@ public final class ScamBlocker extends MessageReceiverAdapter implements UserInt
private final ModerationActionsStore actionsStore;
private final ScamHistoryStore scamHistoryStore;
private final Predicate<String> hasRequiredRole;

private final ComponentIdInteractor componentIdInteractor;
private final SimilarMessagesDetector similarMessagesDetector;

/**
* Creates a new listener to receive all message sent in any channel.
Expand Down Expand Up @@ -103,6 +98,7 @@ public ScamBlocker(ModerationActionsStore actionsStore, ScamHistoryStore scamHis
hasRequiredRole = Pattern.compile(config.getSoftModerationRolePattern()).asMatchPredicate();

componentIdInteractor = new ComponentIdInteractor(getInteractionType(), getName());
similarMessagesDetector = new SimilarMessagesDetector(config.getScamBlocker());
}

@Override
Expand Down Expand Up @@ -141,6 +137,10 @@ public void onMessageReceived(MessageReceivedEvent event) {
isSafe = false;
}

if (isSafe && similarMessagesDetector.doSimilarMessageCheck(event)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doSimilarMessageCheck should be renamed. maybe hasPostedSimilarMessageTooOften(event)

if you dont want it to start with a question-word bc of the side effects, maybe handleHasPostedSimilarMessagesTooOften(event).

but the name should clarify what the boolean it returns means and how it fits into the logic of ScamBlocker

isSafe = false;
}

if (isSafe) {
return;
}
Expand All @@ -153,6 +153,16 @@ public void onMessageReceived(MessageReceivedEvent event) {
takeAction(event);
}

@Override
public Schedule createSchedule() {
return new Schedule(ScheduleMode.FIXED_RATE, 1, 1, TimeUnit.MINUTES);
}

@Override
public void runRoutine(JDA jda) {
similarMessagesDetector.runRoutine();
}

private void takeActionWasAlreadyReported(MessageReceivedEvent event) {
// The user recently send the same scam already, and that was already reported and handled
addScamToHistory(event);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.togetherjava.tjbot.db.generated.tables.records.ScamHistoryRecord;
import org.togetherjava.tjbot.features.utils.Hashing;

import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
import java.util.Collection;
Expand Down Expand Up @@ -138,8 +137,7 @@ public void deleteHistoryOlderThan(Instant olderThan) {
* @return a text representation of the hash
*/
public static String hashMessageContent(Message message) {
return Hashing.bytesToHex(Hashing.hash(HASH_METHOD,
message.getContentRaw().getBytes(StandardCharsets.UTF_8)));
return Hashing.bytesToHex(Hashing.hashUTF8(HASH_METHOD, message.getContentRaw()));
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package org.togetherjava.tjbot.features.moderation.scam;

import net.dv8tion.jda.api.entities.Message;
import net.dv8tion.jda.api.events.message.MessageReceivedEvent;

import org.togetherjava.tjbot.config.ScamBlockerConfig;
import org.togetherjava.tjbot.features.utils.Hashing;

import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Class which tries to detect scams by monitoring similar messages.
*/
public class SimilarMessagesDetector {
private static final String HASH_METHOD = "SHA";

private final ScamBlockerConfig scamBlockerConfig;
private final Set<MessageInfo> messageCache;
private final Set<Long> alreadyFlaggedUsers;

/**
* Creates an instance of this class by using the given config.
*
* @param scamBlockerConfig the scam config
*/
public SimilarMessagesDetector(ScamBlockerConfig scamBlockerConfig) {
this.scamBlockerConfig = scamBlockerConfig;
this.messageCache = new HashSet<>();
this.alreadyFlaggedUsers = new HashSet<>();
}

private boolean shouldIgnore(Message message) {
if (!message.getAttachments().isEmpty()) {
return false;
}
if (message.getContentRaw().length() <= scamBlockerConfig.getSimilarMessageLengthIgnore()) {
return true;
}
return scamBlockerConfig.getSimilarMessagesWhitelist()
.contains(message.getContentRaw().toLowerCase());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.toLowerCase needs Locale.US

}

private MessageInfo addToMessageCache(MessageReceivedEvent event) {
long userId = event.getAuthor().getIdLong();
long channelId = event.getChannel().getIdLong();
String messageHash = getHash(event.getMessage());
Instant timestamp = event.getMessage().getTimeCreated().toInstant();
MessageInfo messageInfo = new MessageInfo(userId, channelId, messageHash, timestamp);
messageCache.add(messageInfo);
return messageInfo;
Comment on lines +47 to +54
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

id still refactor this and move all the stuff dealing with MessageInfo into the record. so:

record MessageInfo(...) {
  static MessageInfo fromMessageEvent(MessageReceivedEvent event) {...}
}

then this addToMessageCache method becomes obsolete and can be replaced with just messageCache.add(MessageInfo.fromMessageEvent(event)); at the call-site

the hash creation would then also move into the records fromMessageEvent method (or as private static helper in the record)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite like adding a unnecessary dependencies to a record, and even less logic like how to create a hash or how to create it from an event imo.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mh... but its a factory to create instances of this record. so imo this is exactly the place it should be. what do others here think?

}

private String getHash(Message message) {
String wholeText = message.getContentRaw() + message.getAttachments()
.stream()
.map(Message.Attachment::getFileName)
.collect(Collectors.joining());
return Hashing.bytesToHex(Hashing.hashUTF8(HASH_METHOD, wholeText));
}

private boolean hasPostedTooManySimilarMessages(long userId, String messageHash) {
long similarMessageCount = messageCache.stream()
.filter(m -> m.userId() == userId && m.messageHash().equals(messageHash)
&& !isObsolete(m))
.count();
return similarMessageCount > scamBlockerConfig.getMaxAllowedSimilarMessages();
}

private boolean isObsolete(MessageInfo messageInfo) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to isOutdated or isExpired

return messageInfo.timestamp()
.plus(scamBlockerConfig.getSimilarMessagesWindow(), ChronoUnit.MINUTES)
.isBefore(Instant.now());
}

/**
* Stores message data and if many messages of same author, different channel and same content
* is posted several times, returns true.
*
* @param event the message event
* @return true if the user spammed the message in several channels, false otherwise
*/
public boolean doSimilarMessageCheck(MessageReceivedEvent event) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could u do some reordering so the flow reads top to bottom. this method would then be at the top, as entry-point. and the private methods then below it in the order they are used. the runRoutine can stay at the very bottom, i guess, since its a detached flow.

long userId = event.getAuthor().getIdLong();
if (alreadyFlaggedUsers.contains(userId)) {
return true;
}
if (shouldIgnore(event.getMessage())) {
return false;
}
String hash = addToMessageCache(event).messageHash();
if (hasPostedTooManySimilarMessages(userId, hash)) {
alreadyFlaggedUsers.add(userId);
return true;
} else {
return false;
Comment on lines +88 to +99
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am not sure if that alreadyFlaggedUsers user thing really works.. the bot has a really long uptime usually, sometimes many months.
the way i read the code is that if a user got flagged once, they will then be immune to this check until the bot is restarted again. u could argue that its probably not an issue in practice but it smells a bit. you would need a time based cleanup for alreadyFlaggedUsers as well.

at which point u have like 30 lines of code dealing with what caffeeine gets done in one line, i guess.

private final Cache<String, Instant> userIdToAskedAtCache = Caffeine.newBuilder()
  .maximumSize(1_000)
  .expireAfterWrite(Duration.of(10, ChronoUnit.SECONDS))
  .build();

the interface is essentially that of a Map, so you have various get and put variants. internally its a LRU map, so it kicks out the entries that werent touched the longest when it reaches the max limit.

Copy link
Contributor Author

@Alathreon Alathreon Aug 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do i make it a set ? if the cache already handles he cleanup, then storing the instant is pointless.
edit: i saw your other comment, i'll see that

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

caffeine only has a Map. sometimes in the past we also only needed a Set and then mapped it to something. for example to itself, i.e. key == value.

}
}

/**
* Has to be called often to clear the cache.
*/
public void runRoutine() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to clearCache or cleanupCache or something.

messageCache.removeIf(this::isObsolete);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,16 @@ public static byte[] hash(String method, byte[] data) {
throw new IllegalStateException("Hash method must be supported", e);
}
}

/**
* Hashes the given UTF 8 text using the given method, see {@link Hashing#hash(String, byte[])}.
*
* @param method the method to use for hashing, must be supported by {@link MessageDigest}, e.g.
* {@code "SHA"}
* @param text the UTF 8 text to hash
* @return the computed hash
*/
public static byte[] hashUTF8(String method, String text) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: should probably be renamed into hashUtf8 for better camelCase readability

return hash(method, text.getBytes(StandardCharsets.UTF_8));
}
}
Loading