Skip to content

Commit 1ed99e3

Browse files
committed
Add automatic checking for profanity
This adds functionality to automatically check for profanity in text messages written in any of the XMPP MUC rooms monitored by the moderation bot. The terms being considered profanity can be configured using the database and are language specific. They have to be stored in their lemmatized form. If a supported language gets detected with an accuracy of 100% only terms for that language will be checked, otherwise English terms will be checked as well. Supported languages for now are English, French, German, Polish, Portuguese, Russian, Spanish and Turkish. For the first two times in a sliding window of three months a user uses profanity they'll receive a warning. Starting from the third time, the user will get muted. At first users will be muted for five minutes, with an exponentially increasing duration up to one week for each continued use of profanity afterwards. To enable this functionality the `--enable-profanity-monitoring` command line option has to be provided.
1 parent 64d6cf2 commit 1ed99e3

File tree

3 files changed

+273
-27
lines changed

3 files changed

+273
-27
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies = [
2020
"cachetools",
2121
"defusedxml",
2222
"dateparser",
23+
"simplemma[marisa-trie]>=1.1.1",
2324
"slixmpp>=1.8.0",
2425
"sqlalchemy>=2.0.4",
2526
]
@@ -87,5 +88,5 @@ max-doc-length = 72
8788
convention = "pep257"
8889

8990
[tool.ruff.lint.pylint]
90-
max-args = 8
91+
max-args = 10
9192
max-nested-blocks = 4

xpartamupp/lobby_moderation_db.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from typing import Any, ClassVar
2525

2626
from sqlalchemy import (
27+
JSON,
2728
DateTime,
2829
ForeignKey,
2930
String,
@@ -69,20 +70,13 @@ class Base(DeclarativeBase):
6970
}
7071

7172

72-
class Blacklist(Base):
73+
class ProfanityTerms(Base):
7374
"""Model for profanity terms."""
7475

75-
__tablename__ = "profanity_blacklist"
76+
__tablename__ = "profanity_terms"
7677

77-
word: Mapped[str] = mapped_column(String(255), primary_key=True)
78-
79-
80-
class Whitelist(Base):
81-
"""Model for terms which are whitelisted from profanity."""
82-
83-
__tablename__ = "profanity_whitelist"
84-
85-
word: Mapped[str] = mapped_column(String(255), primary_key=True)
78+
term: Mapped[str] = mapped_column(String(255), primary_key=True)
79+
language: Mapped[str] = mapped_column(String(2), primary_key=True)
8680

8781

8882
class ProfanityIncident(Base):
@@ -91,10 +85,12 @@ class ProfanityIncident(Base):
9185
__tablename__ = "profanity_incidents"
9286

9387
id: Mapped[int] = mapped_column(primary_key=True)
94-
timestamp: Mapped[datetime]
88+
timestamp: Mapped[datetime] = mapped_column(default=partial(datetime.now, tz=UTC))
9589
player: Mapped[str] = mapped_column(String(255))
90+
room: Mapped[str] = mapped_column(String(255))
9691
offending_content: Mapped[str] = mapped_column(UnicodeText)
97-
deleted: Mapped[bool]
92+
detected_languages: Mapped[list[str]] = mapped_column(JSON)
93+
matched_terms: Mapped[list[str]] = mapped_column(JSON)
9894

9995

10096
class JIDNickWhitelist(Base):

0 commit comments

Comments
 (0)