Skip to content

Commit 4a0525f

Browse files
committed
add pre-commit-config.yml apply linter and formater over the code + add github linter on push to main
1 parent d0fb56a commit 4a0525f

17 files changed

+534
-355
lines changed

.github/workflows/ruff.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Ruff Linting
2+
3+
on:
4+
push:
5+
branches: [ main, develop ]
6+
pull_request:
7+
branches: [ main ]
8+
workflow_dispatch:
9+
10+
jobs:
11+
ruff:
12+
runs-on: ubuntu-latest
13+
name: Ruff Check
14+
15+
steps:
16+
- name: Checkout code
17+
uses: actions/checkout@v4
18+
19+
- name: Set up Python
20+
uses: actions/setup-python@v5
21+
with:
22+
python-version: "3.10"
23+
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install ruff
28+
pip install .
29+
30+
- name: Run Ruff Check
31+
run: ruff check --output-format=github .
32+
33+
- name: Run Ruff Format Check
34+
run: ruff format --check .

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
rev: v0.12.3
4+
hooks:
5+
- id: ruff
6+
args: [ --fix ]
7+
- id: ruff-format

setup.cfg

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
[tool:ruff]
2+
line-length = 88
3+
target-version = py310
4+
exclude =
5+
.bzr,
6+
.direnv,
7+
.eggs,
8+
.git,
9+
.git-rewrite,
10+
.hg,
11+
.mypy_cache,
12+
.nox,
13+
.pants.d,
14+
.pyenv,
15+
.pytest_cache,
16+
.pytype,
17+
.ruff_cache,
18+
.svn,
19+
.tox,
20+
.venv,
21+
.vscode,
22+
__pypackages__,
23+
_build,
24+
buck-out,
25+
build,
26+
dist,
27+
node_modules,
28+
site-packages,
29+
venv
30+
31+
[tool:ruff.lint]
32+
select = E4,E7,E9,F,I,B,C4,W
33+
ignore = E501,B008,B904
34+
fixable = ALL
35+
unfixable =
36+
dummy-variable-rgx = ^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$
37+
38+
[tool:ruff.format]
39+
quote-style = double
40+
indent-style = space
41+
skip-magic-trailing-comma = false
42+
line-ending = auto
43+
docstring-code-format = false
44+
docstring-code-line-length = dynamic
45+
46+
[tool:ruff.lint.isort]
47+
known-first-party = UnionChatBot

setup.py

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,53 +3,53 @@
33
__version__ = "0.1.0"
44

55
# Read requirements from requirements.txt
6-
with open('requirements.txt') as f:
7-
requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')]
6+
with open("requirements.txt") as f:
7+
requirements = [
8+
line.strip() for line in f if line.strip() and not line.startswith("#")
9+
]
810

911
# Read long description from README.md
10-
with open('README.md', 'r', encoding='utf-8') as f:
12+
with open("README.md", "r", encoding="utf-8") as f:
1113
long_description = f.read()
1214

1315
setup(
14-
name='UnionChatBot',
16+
name="UnionChatBot",
1517
version=__version__,
16-
python_requires='>=3.10.0',
17-
url='https://github.com/GishB/GeneralPurposeTelegramBOT',
18-
license='MIT License',
19-
author='Aleksandr Samofalov',
20-
author_email='SamofalovWORK@yandex.ru',
21-
description='TelegramBot for general questions related to documents via Yandex API',
18+
python_requires=">=3.10.0",
19+
url="https://github.com/GishB/GeneralPurposeTelegramBOT",
20+
license="MIT License",
21+
author="Aleksandr Samofalov",
22+
author_email="SamofalovWORK@yandex.ru",
23+
description="TelegramBot for general questions related to documents via Yandex API",
2224
long_description=long_description,
23-
long_description_content_type='text/markdown',
24-
package_dir={'': 'src'},
25-
packages=find_packages(where="src", exclude=[
26-
'tests*',
27-
'experiments*',
28-
'docs*',
29-
'.*',
30-
'*.egg-info',
31-
'build*',
32-
'dist*'
33-
]),
25+
long_description_content_type="text/markdown",
26+
package_dir={"": "src"},
27+
packages=find_packages(
28+
where="src",
29+
exclude=[
30+
"tests*",
31+
"experiments*",
32+
"docs*",
33+
".*",
34+
"*.egg-info",
35+
"build*",
36+
"dist*",
37+
],
38+
),
3439
install_requires=requirements,
3540
include_package_data=True,
3641
zip_safe=False,
3742
classifiers=[
38-
'Development Status :: Alpha',
39-
'Intended Audience :: Develop',
40-
'License :: OSI Approved :: MIT License',
41-
'Programming Language :: Python :: 3.10',
42-
'Topic :: Develop/Engineering :: Text Information Analysis',
43-
'Operating System :: OS Ubuntu 2022 TLS',
43+
"Development Status :: Alpha",
44+
"Intended Audience :: Develop",
45+
"License :: OSI Approved :: MIT License",
46+
"Programming Language :: Python :: 3.10",
47+
"Topic :: Develop/Engineering :: Text Information Analysis",
48+
"Operating System :: OS Ubuntu 2022 TLS",
4449
],
4550
project_urls={
46-
'Bug Reports': 'https://github.com/GishB/GeneralPurposeTelegramBOT/issues',
47-
'Source': 'https://github.com/GishB/GeneralPurposeTelegramBOT',
51+
"Bug Reports": "https://github.com/GishB/GeneralPurposeTelegramBOT/issues",
52+
"Source": "https://github.com/GishB/GeneralPurposeTelegramBOT",
4853
},
49-
keywords=[
50-
'YandexGPT API',
51-
'LLM',
52-
'RAG',
53-
'TelegramBot'
54-
],
55-
)
54+
keywords=["YandexGPT API", "LLM", "RAG", "TelegramBot"],
55+
)

src/UnionChatBot/utils/ChatHistoryManager.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
from datetime import datetime, timedelta
55
from typing import Optional
66

7+
78
class ChatHistoryManager:
8-
def __init__(self,
9-
redis_host: str = 'localhost',
10-
redis_port: int = 6379,
11-
redis_db: int = 1,
12-
history_ttl_days: Optional[int] = None,
13-
max_history_length: Optional[int] = None):
9+
def __init__(
10+
self,
11+
redis_host: str = "localhost",
12+
redis_port: int = 6379,
13+
redis_db: int = 1,
14+
history_ttl_days: Optional[int] = None,
15+
max_history_length: Optional[int] = None,
16+
):
1417
"""
1518
Инициализация менеджера истории чата.
1619
@@ -19,13 +22,21 @@ def __init__(self,
1922
redis_port: порт Redis
2023
redis_db: номер базы данных Redis
2124
"""
22-
self.max_history_length = max_history_length if max_history_length else os.environ["MAX_HISTORY_USER_LENGTH"]
23-
self.history_ttl_days = history_ttl_days if history_ttl_days else os.environ["HISTORY_USER_TTL_DAYS"]
25+
self.max_history_length = (
26+
max_history_length
27+
if max_history_length
28+
else os.environ["MAX_HISTORY_USER_LENGTH"]
29+
)
30+
self.history_ttl_days = (
31+
history_ttl_days
32+
if history_ttl_days
33+
else os.environ["HISTORY_USER_TTL_DAYS"]
34+
)
2435
self.redis_client = redis.StrictRedis(
2536
host=redis_host,
2637
port=redis_port,
2738
db=redis_db,
28-
decode_responses=True # Автоматически декодируем из bytes в str
39+
decode_responses=True, # Автоматически декодируем из bytes в str
2940
)
3041
self.history_ttl_days = history_ttl_days
3142

@@ -61,7 +72,7 @@ def add_message_to_history(self, user_id: str, message: str) -> None:
6172
history_key = self._get_history_key(user_id)
6273

6374
# Добавляем сообщение с временной меткой
64-
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
75+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
6576
formatted_message = f"[{timestamp}] {message}"
6677

6778
# Добавляем в начало списка и ограничиваем длину
@@ -100,4 +111,4 @@ def clear_user_history(self, user_id: str) -> None:
100111
user_id: идентификатор пользователя
101112
"""
102113
history_key = self._get_history_key(user_id)
103-
self.redis_client.delete(history_key)
114+
self.redis_client.delete(history_key)

src/UnionChatBot/utils/ChromaAdapter.py

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,21 @@
66
from UnionChatBot.utils.EmbeddingAPI import MyEmbeddingFunction
77
from UnionChatBot.utils.RerankerAPI import BM25Reranker
88

9+
910
class ChromaAdapter:
1011
def __init__(
11-
self,
12-
host: str = "localhost",
13-
port: int = 32000,
14-
max_rag_documents: int = 20,
15-
topk_documents: int = 3,
16-
similarity_filter: float = 1.5,
17-
embedding_model: str = "all-MiniLM-L6-v2",
18-
reranker_type: str = "bm25",
19-
api_key: Optional[str] = None,
20-
folder_id: Optional[str] = None,
21-
text_type: str = "doc",
22-
api_url: Optional[str] = None
12+
self,
13+
host: str = "localhost",
14+
port: int = 32000,
15+
max_rag_documents: int = 20,
16+
topk_documents: int = 3,
17+
similarity_filter: float = 1.5,
18+
embedding_model: str = "all-MiniLM-L6-v2",
19+
reranker_type: str = "bm25",
20+
api_key: Optional[str] = None,
21+
folder_id: Optional[str] = None,
22+
text_type: str = "doc",
23+
api_url: Optional[str] = None,
2324
):
2425
self.reranker_type = reranker_type
2526
if reranker_type == "bm25":
@@ -48,29 +49,26 @@ def embedding_function(self):
4849
api_url=self.api_url,
4950
folder_id=self.folder_id,
5051
iam_token=self.api_key,
51-
text_type=self.text_type
52+
text_type=self.text_type,
5253
)
5354
return self._embedding_function
5455

5556
def get_info_from_db(
56-
self,
57-
query: str,
58-
collection_name: str,
59-
n_results: int = 30,
60-
**kwargs
57+
self, query: str, collection_name: str, n_results: int = 30, **kwargs
6158
) -> Dict[str, Any]:
6259
collection = self.client.get_collection(
63-
name=collection_name,
64-
embedding_function=self.embedding_function
60+
name=collection_name, embedding_function=self.embedding_function
6561
)
6662
return collection.query(
6763
query_texts=[query],
6864
n_results=n_results,
69-
include=["documents", "metadatas", "distances"]
65+
include=["documents", "metadatas", "distances"],
7066
)
7167

7268
def get_filtered_documents(self, data_raw: Dict[str, Any]) -> dict:
73-
distances = data_raw["distances"][0] # Берем первый элемент, так как query_texts=[query]
69+
distances = data_raw["distances"][
70+
0
71+
] # Берем первый элемент, так как query_texts=[query]
7472
documents = data_raw["documents"][0]
7573
metadatas = data_raw["metadatas"][0]
7674

@@ -84,7 +82,8 @@ def get_filtered_documents(self, data_raw: Dict[str, Any]) -> dict:
8482
metadatas[idx]
8583
for idx, dist in enumerate(distances)
8684
if dist < self.similarity_filter
87-
]}
85+
],
86+
}
8887

8988
def get_pairs(self, query: str, documents: List[str]) -> List[List[str]]:
9089
return [[query, doc] for doc in documents]
@@ -100,7 +99,7 @@ def get_info(self, query: str, collection_name: str) -> dict[str, list[Any] | st
10099
data_raw = self.get_info_from_db(
101100
query=query,
102101
collection_name=collection_name,
103-
n_results=self.max_rag_documents
102+
n_results=self.max_rag_documents,
104103
)
105104
filtered_documents = self.get_filtered_documents(data_raw)
106105

@@ -109,13 +108,19 @@ def get_info(self, query: str, collection_name: str) -> dict[str, list[Any] | st
109108
"documents": [],
110109
"metadatas": [],
111110
"query": query,
112-
"collection_name": collection_name
111+
"collection_name": collection_name,
113112
}
114113

115-
idx_relevant_documents = self.apply_reranker(query=query, documents=filtered_documents["documents"])
114+
idx_relevant_documents = self.apply_reranker(
115+
query=query, documents=filtered_documents["documents"]
116+
)
116117
return {
117-
"documents": [filtered_documents["documents"][idx] for idx in idx_relevant_documents],
118-
"metadatas": [filtered_documents["metadatas"][idx] for idx in idx_relevant_documents],
118+
"documents": [
119+
filtered_documents["documents"][idx] for idx in idx_relevant_documents
120+
],
121+
"metadatas": [
122+
filtered_documents["metadatas"][idx] for idx in idx_relevant_documents
123+
],
119124
"query": query,
120-
"collection_name": collection_name
121-
}
125+
"collection_name": collection_name,
126+
}

0 commit comments

Comments
 (0)