-
-
Notifications
You must be signed in to change notification settings - Fork 101
feature/handle-similar-messages-as-scam #1292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,7 +89,11 @@ | |
], | ||
"isHostSimilarToKeywordDistanceThreshold": 2, | ||
"suspiciousAttachmentsThreshold": 3, | ||
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}" | ||
"suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}", | ||
"maxAllowedSimilarMessages": 2, | ||
"similarMessagesWindow": 1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. missing time unit. please add it to the name. |
||
"similarMessageLengthIgnore": 10, | ||
"similarMessagesWhitelist": [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i dont think we need that, please remove. the UX for it for maintainers would be really weird, having to bloat the config with 100 character long messages and whatnot. and then it didnt work bc of a smiley or other stuff that ends up slightly different. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All 4? or just the whitelist ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, the preview didnt turn out the way i wanted. i was referring to the message whitelist. |
||
}, | ||
"wolframAlphaAppId": "79J52T-6239TVXHR7", | ||
"helpSystem": { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package org.togetherjava.tjbot.features.moderation.scam; | ||
|
||
|
||
import java.time.Instant; | ||
import java.util.Objects; | ||
|
||
/** | ||
* Information about a message, used to detect spam of the same message by the same user in | ||
* different channels. | ||
* | ||
* @param userId the id of the user | ||
* @param channelId the channel where the message was posted | ||
* @param messageHash the hash of the message | ||
* @param timestamp when the message was posted | ||
*/ | ||
public record MessageInfo(long userId, long channelId, String messageHash, Instant timestamp) { | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
return other instanceof MessageInfo message && this.userId == message.userId | ||
&& this.channelId == message.channelId; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(userId, channelId); | ||
} | ||
Comment on lines
+18
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i find that quite confusing, not what someone using this record would expect. if you need this to determine duplicates in ur There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm ill try that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I tried and the problem is that by using a map it just makes the code less readable. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,7 @@ | |
|
||
import org.togetherjava.tjbot.config.Config; | ||
import org.togetherjava.tjbot.config.ScamBlockerConfig; | ||
import org.togetherjava.tjbot.features.MessageReceiverAdapter; | ||
import org.togetherjava.tjbot.features.UserInteractionType; | ||
import org.togetherjava.tjbot.features.UserInteractor; | ||
import org.togetherjava.tjbot.features.*; | ||
import org.togetherjava.tjbot.features.componentids.ComponentIdGenerator; | ||
import org.togetherjava.tjbot.features.componentids.ComponentIdInteractor; | ||
import org.togetherjava.tjbot.features.moderation.ModerationAction; | ||
|
@@ -38,11 +36,8 @@ | |
import org.togetherjava.tjbot.logging.LogMarkers; | ||
|
||
import java.awt.Color; | ||
import java.util.Collection; | ||
import java.util.EnumSet; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import java.util.Set; | ||
import java.util.*; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.function.Consumer; | ||
import java.util.function.Predicate; | ||
import java.util.function.UnaryOperator; | ||
|
@@ -55,7 +50,7 @@ | |
* If scam is detected, depending on the configuration, the blockers actions range from deleting the | ||
* message and banning the author to just logging the message for auditing. | ||
*/ | ||
public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor { | ||
public final class ScamBlocker extends MessageReceiverAdapter implements UserInteractor, Routine { | ||
private static final Logger logger = LoggerFactory.getLogger(ScamBlocker.class); | ||
private static final Color AMBIENT_COLOR = Color.decode("#CFBFF5"); | ||
private static final Set<ScamBlockerConfig.Mode> MODES_WITH_IMMEDIATE_DELETION = | ||
|
@@ -72,8 +67,8 @@ public final class ScamBlocker extends MessageReceiverAdapter implements UserInt | |
private final ModerationActionsStore actionsStore; | ||
private final ScamHistoryStore scamHistoryStore; | ||
private final Predicate<String> hasRequiredRole; | ||
|
||
private final ComponentIdInteractor componentIdInteractor; | ||
private final SimilarMessagesDetector similarMessagesDetector; | ||
|
||
/** | ||
* Creates a new listener to receive all message sent in any channel. | ||
|
@@ -103,6 +98,7 @@ public ScamBlocker(ModerationActionsStore actionsStore, ScamHistoryStore scamHis | |
hasRequiredRole = Pattern.compile(config.getSoftModerationRolePattern()).asMatchPredicate(); | ||
|
||
componentIdInteractor = new ComponentIdInteractor(getInteractionType(), getName()); | ||
similarMessagesDetector = new SimilarMessagesDetector(config.getScamBlocker()); | ||
} | ||
|
||
@Override | ||
|
@@ -141,6 +137,10 @@ public void onMessageReceived(MessageReceivedEvent event) { | |
isSafe = false; | ||
} | ||
|
||
if (isSafe && similarMessagesDetector.doSimilarMessageCheck(event)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
if you dont want it to start with a question-word bc of the side effects, maybe but the name should clarify what the |
||
isSafe = false; | ||
} | ||
|
||
if (isSafe) { | ||
return; | ||
} | ||
|
@@ -153,6 +153,16 @@ public void onMessageReceived(MessageReceivedEvent event) { | |
takeAction(event); | ||
} | ||
|
||
@Override | ||
public Schedule createSchedule() { | ||
return new Schedule(ScheduleMode.FIXED_RATE, 1, 1, TimeUnit.MINUTES); | ||
} | ||
|
||
@Override | ||
public void runRoutine(JDA jda) { | ||
similarMessagesDetector.runRoutine(); | ||
} | ||
|
||
private void takeActionWasAlreadyReported(MessageReceivedEvent event) { | ||
// The user recently send the same scam already, and that was already reported and handled | ||
addScamToHistory(event); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
package org.togetherjava.tjbot.features.moderation.scam; | ||
|
||
import net.dv8tion.jda.api.entities.Message; | ||
import net.dv8tion.jda.api.events.message.MessageReceivedEvent; | ||
|
||
import org.togetherjava.tjbot.config.ScamBlockerConfig; | ||
import org.togetherjava.tjbot.features.utils.Hashing; | ||
|
||
import java.time.Instant; | ||
import java.time.temporal.ChronoUnit; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* Class which tries to detect scams by monitoring similar messages. | ||
*/ | ||
public class SimilarMessagesDetector { | ||
private static final String HASH_METHOD = "SHA"; | ||
|
||
private final ScamBlockerConfig scamBlockerConfig; | ||
private final Set<MessageInfo> messageCache; | ||
private final Set<Long> alreadyFlaggedUsers; | ||
|
||
/** | ||
* Creates an instance of this class by using the given config. | ||
* | ||
* @param scamBlockerConfig the scam config | ||
*/ | ||
public SimilarMessagesDetector(ScamBlockerConfig scamBlockerConfig) { | ||
this.scamBlockerConfig = scamBlockerConfig; | ||
this.messageCache = new HashSet<>(); | ||
this.alreadyFlaggedUsers = new HashSet<>(); | ||
} | ||
|
||
private boolean shouldIgnore(Message message) { | ||
if (!message.getAttachments().isEmpty()) { | ||
return false; | ||
} | ||
if (message.getContentRaw().length() <= scamBlockerConfig.getSimilarMessageLengthIgnore()) { | ||
return true; | ||
} | ||
return scamBlockerConfig.getSimilarMessagesWhitelist() | ||
.contains(message.getContentRaw().toLowerCase()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
|
||
private MessageInfo addToMessageCache(MessageReceivedEvent event) { | ||
long userId = event.getAuthor().getIdLong(); | ||
long channelId = event.getChannel().getIdLong(); | ||
String messageHash = getHash(event.getMessage()); | ||
Instant timestamp = event.getMessage().getTimeCreated().toInstant(); | ||
MessageInfo messageInfo = new MessageInfo(userId, channelId, messageHash, timestamp); | ||
messageCache.add(messageInfo); | ||
return messageInfo; | ||
Comment on lines
+47
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. id still refactor this and move all the stuff dealing with record MessageInfo(...) {
static MessageInfo fromMessageEvent(MessageReceivedEvent event) {...}
} then this the hash creation would then also move into the records There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't quite like adding a unnecessary dependencies to a record, and even less logic like how to create a hash or how to create it from an event imo. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mh... but its a factory to create instances of this record. so imo this is exactly the place it should be. what do others here think? |
||
} | ||
|
||
private String getHash(Message message) { | ||
String wholeText = message.getContentRaw() + message.getAttachments() | ||
.stream() | ||
.map(Message.Attachment::getFileName) | ||
.collect(Collectors.joining()); | ||
return Hashing.bytesToHex(Hashing.hashUTF8(HASH_METHOD, wholeText)); | ||
} | ||
|
||
private boolean hasPostedTooManySimilarMessages(long userId, String messageHash) { | ||
long similarMessageCount = messageCache.stream() | ||
.filter(m -> m.userId() == userId && m.messageHash().equals(messageHash) | ||
&& !isObsolete(m)) | ||
.count(); | ||
return similarMessageCount > scamBlockerConfig.getMaxAllowedSimilarMessages(); | ||
Zabuzard marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
private boolean isObsolete(MessageInfo messageInfo) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename to |
||
return messageInfo.timestamp() | ||
.plus(scamBlockerConfig.getSimilarMessagesWindow(), ChronoUnit.MINUTES) | ||
.isBefore(Instant.now()); | ||
} | ||
|
||
/** | ||
* Stores message data and if many messages of same author, different channel and same content | ||
* is posted several times, returns true. | ||
* | ||
* @param event the message event | ||
* @return true if the user spammed the message in several channels, false otherwise | ||
*/ | ||
public boolean doSimilarMessageCheck(MessageReceivedEvent event) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could u do some reordering so the flow reads top to bottom. this method would then be at the top, as entry-point. and the private methods then below it in the order they are used. the |
||
long userId = event.getAuthor().getIdLong(); | ||
if (alreadyFlaggedUsers.contains(userId)) { | ||
return true; | ||
} | ||
if (shouldIgnore(event.getMessage())) { | ||
return false; | ||
} | ||
String hash = addToMessageCache(event).messageHash(); | ||
if (hasPostedTooManySimilarMessages(userId, hash)) { | ||
alreadyFlaggedUsers.add(userId); | ||
return true; | ||
} else { | ||
return false; | ||
Comment on lines
+88
to
+99
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i am not sure if that at which point u have like 30 lines of code dealing with what caffeeine gets done in one line, i guess. private final Cache<String, Instant> userIdToAskedAtCache = Caffeine.newBuilder()
.maximumSize(1_000)
.expireAfterWrite(Duration.of(10, ChronoUnit.SECONDS))
.build(); the interface is essentially that of a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how do i make it a set ? if the cache already handles he cleanup, then storing the instant is pointless. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. caffeine only has a |
||
} | ||
} | ||
|
||
/** | ||
* Has to be called often to clear the cache. | ||
*/ | ||
public void runRoutine() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename to |
||
messageCache.removeIf(this::isObsolete); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,4 +57,16 @@ public static byte[] hash(String method, byte[] data) { | |
throw new IllegalStateException("Hash method must be supported", e); | ||
} | ||
} | ||
|
||
/** | ||
* Hashes the given UTF 8 text using the given method, see {@link Hashing#hash(String, byte[])}. | ||
* | ||
* @param method the method to use for hashing, must be supported by {@link MessageDigest}, e.g. | ||
* {@code "SHA"} | ||
* @param text the UTF 8 text to hash | ||
* @return the computed hash | ||
*/ | ||
public static byte[] hashUTF8(String method, String text) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: should probably be renamed into |
||
return hash(method, text.getBytes(StandardCharsets.UTF_8)); | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.