Skip to content

Commit 0a150a7

Browse files
solves merge conflicts
2 parents 953c4e1 + c4c6ec7 commit 0a150a7

File tree

68 files changed

+912
-103
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+912
-103
lines changed

classifiers/__init__.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
from fastapi import APIRouter
22

33
from .llm import (
4-
gpt_classifier,
4+
gpt_classifier,
55
deberta_review_classifier,
66
bert_sentiment_german,
7-
distilbert_stock_news_classifier
7+
distilbert_stock_news_classifier,
88
)
99

1010
from .lookup_lists import lookup_list
1111

1212
from .reference_complexity import (
13-
maximum_sentence_complexity
13+
maximum_sentence_complexity,
14+
tiktoken_length_classifier,
15+
chunked_sentence_complexity,
1416
)
1517

18+
from .question_type import question_type_classifier
19+
20+
from .communication_style import communication_style_classifier
21+
1622
from .reference_quality import (
23+
word_count_classifier,
1724
special_character_classifier,
18-
chunked_sentence_complexity,
1925
)
2026

2127
from .dates_and_times import (
@@ -31,10 +37,6 @@
3137
cosine_similarity,
3238
)
3339

34-
from .spelling import (
35-
spelling_check,
36-
)
37-
3840
from .text_analysis import (
3941
emotionality_detection,
4042
language_detection,
@@ -63,11 +65,15 @@
6365
textblob_subjectivity,
6466
distilbert_stock_news_classifier,
6567
workday_classifier,
66-
deberta_review_classifier,
68+
deberta_review_classifier,
6769
bert_sentiment_german,
70+
tiktoken_length_classifier,
71+
word_count_classifier,
6872
special_character_classifier,
6973
chunked_sentence_complexity,
70-
maximum_sentence_complexity
74+
maximum_sentence_complexity,
75+
question_type_classifier,
76+
communication_style_classifier,
7177
]:
7278
module_name = module.__name__.split(".")[-1]
7379
model_name = (
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `Action-seeking`, `Fact-oriented`, `Information-seeking` or `Self-revealing`.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pydantic import BaseModel
2+
import requests
3+
4+
INPUT_EXAMPLE = {
5+
"text": "Change the number in row 2 and 3.",
6+
"model_name": "KernAI/multilingual-e5-communication-style",
7+
}
8+
9+
10+
class CommunicationStyleClassifierModel(BaseModel):
11+
text: str
12+
model_name: str
13+
14+
class Config:
15+
schema_extra = {"example": INPUT_EXAMPLE}
16+
17+
18+
def communication_style_classifier(req: CommunicationStyleClassifierModel):
19+
"""Uses custom E5 model to classify communication style of a text"""
20+
payload = {
21+
"model_name": req.model_name,
22+
"text": req.text
23+
}
24+
response = requests.post("https://free.api.kern.ai/inference", json=payload)
25+
if response.ok:
26+
return {"communication_style": response.json()["label"]}
27+
return response.raise_for_status()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
```python
2+
import requests
3+
4+
def communication_style_classifier(text: str, model_name: str, request_url: str = "https://free.api.kern.ai/inference") -> str:
5+
"""
6+
@param text: text with a user query you want to classify
7+
@param model_name: Name of a model provided by Kern AI
8+
@param request_url: URL to the API endpoint of Kern AI
9+
@return: returns either 'Action-seeking', 'Fact-oriented', 'Information-seeking' or 'Self-revealing'.
10+
"""
11+
payload = {
12+
"model_name": model_name,
13+
"text": text
14+
}
15+
response = requests.post(request_url, json=payload)
16+
if response.ok:
17+
return response.json()["label"]
18+
return response.raise_for_status()
19+
20+
21+
# ↑ necessary bricks function
22+
# -----------------------------------------------------------------------------------------
23+
# ↓ example implementation
24+
25+
26+
model_name = "KernAI/multilingual-e5-communication-style"
27+
28+
def example_integration():
29+
texts = ["Change the number in row 2 and 3.", "Can you show me some data from the references?", "I am super happy today."]
30+
for text in texts:
31+
print(f"the communication style of \"{text}\" is \"{communication_style_classifier(text, model_name=model_name)}\"")
32+
33+
example_integration()
34+
```
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
```python
2+
import requests
3+
4+
ATTRIBUTE: str = "text" # only text attributes
5+
MODEL_NAME: str = "KernAI/multilingual-e5-communication-style"
6+
REQUEST_URL: str = "https://free.api.kern.ai/inference"
7+
8+
def communication_style_classifier(record):
9+
payload = {
10+
"model_name": MODEL_NAME,
11+
"text": record[ATTRIBUTE].text
12+
}
13+
response = requests.post(REQUEST_URL, json=payload)
14+
if response.ok:
15+
return response.json()["label"]
16+
```
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from util.configs import build_classifier_function_config
2+
from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
3+
from . import communication_style_classifier, INPUT_EXAMPLE
4+
5+
6+
def get_config():
7+
return build_classifier_function_config(
8+
function=communication_style_classifier,
9+
input_example=INPUT_EXAMPLE,
10+
issue_id=343,
11+
tabler_icon="CircleDotted",
12+
min_refinery_version="1.7.0",
13+
state=State.PUBLIC.value,
14+
type="python_function",
15+
available_for=["refinery", "common"],
16+
part_of_group=[
17+
"communication_style"
18+
], # first entry should be parent directory
19+
# bricks integrator information
20+
integrator_inputs={
21+
"name": "communication_style_classifier",
22+
"refineryDataType": RefineryDataType.TEXT.value,
23+
"outputs": ["Action-seeking", "Fact-oriented", "Information-seeking", "Self-revealing"],
24+
"variables": {
25+
"ATTRIBUTE": {
26+
"selectionType": SelectionType.CHOICE.value,
27+
"addInfo": [
28+
BricksVariableType.ATTRIBUTE.value,
29+
BricksVariableType.GENERIC_STRING.value
30+
]
31+
},
32+
"MODEL_NAME": {
33+
"selectionType": SelectionType.STRING.value,
34+
"defaultValue": "KernAI/multilingual-e5-communication-style",
35+
"addInfo": [
36+
BricksVariableType.GENERIC_STRING.value
37+
]
38+
},
39+
"REQUEST_URL": {
40+
"selectionType": SelectionType.STRING.value,
41+
"defaultValue": "https://free.api.kern.ai/inference",
42+
"addInfo": [
43+
BricksVariableType.GENERIC_STRING.value
44+
]
45+
}
46+
}
47+
}
48+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `Keyword-question`, `Statement-question` or `Interrogative-question`.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pydantic import BaseModel
2+
import requests
3+
4+
INPUT_EXAMPLE = {
5+
"text": "Sushi restaurants Barcelona",
6+
"model_name": "KernAI/multilingual-e5-question-type",
7+
}
8+
9+
10+
class QuestionTypeClassifierModel(BaseModel):
11+
text: str
12+
model_name: str
13+
14+
class Config:
15+
schema_extra = {"example": INPUT_EXAMPLE}
16+
17+
18+
def question_type_classifier(req: QuestionTypeClassifierModel):
19+
"""Uses custom E5 model to classify the question type of a text"""
20+
payload = {
21+
"model_name": req.model_name,
22+
"text": req.text
23+
}
24+
response = requests.post("https://free.api.kern.ai/inference", json=payload)
25+
if response.ok:
26+
return {"question_type": response.json()["label"]}
27+
return response.raise_for_status()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
```python
2+
import requests
3+
4+
def question_type_classifier(text: str, model_name: str, request_url: str = "https://free.api.kern.ai/inference") -> str:
5+
"""
6+
@param text: text with a user question you want to classify
7+
@param model_name: Name of a model provided by Kern AI
8+
@param request_url: URL to the API endpoint of Kern AI
9+
@return: returns either 'Keyword-question', 'Interrogative-question' or 'Statement-question'
10+
"""
11+
payload = {
12+
"model_name": model_name,
13+
"text": text
14+
}
15+
response = requests.post(request_url, json=payload)
16+
if response.ok:
17+
return response.json()["label"]
18+
return response.raise_for_status()
19+
20+
21+
# ↑ necessary bricks function
22+
# -----------------------------------------------------------------------------------------
23+
# ↓ example implementation
24+
25+
26+
model_name = "KernAI/multilingual-e5-question-type"
27+
28+
def example_integration():
29+
texts = ["Travel documents Germany", "Give me documents related to travel insurance.", "What is the content of these documents about?"]
30+
for text in texts:
31+
print(f"the question type of \"{text}\" is \"{question_type_classifier(text, model_name=model_name)}\"")
32+
33+
example_integration()
34+
```
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
```python
2+
import requests
3+
4+
ATTRIBUTE: str = "text" # only text attributes
5+
MODEL_NAME: str = "KernAI/multilingual-e5-question-type"
6+
REQUEST_URL: str = "https://free.api.kern.ai/inference"
7+
8+
def question_type_classifier(record):
9+
payload = {
10+
"model_name": MODEL_NAME,
11+
"text": record[ATTRIBUTE].text
12+
}
13+
response = requests.post(REQUEST_URL, json=payload)
14+
if response.ok:
15+
return response.json()["label"]
16+
```

0 commit comments

Comments
 (0)