Skip to content

Commit 67f9c6e

Browse files
Merge remote-tracking branch 'origin/main' into location-extraction
2 parents 7cea295 + 043a08c commit 67f9c6e

File tree

23 files changed

+716
-314
lines changed

23 files changed

+716
-314
lines changed

classifiers/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,20 @@
99

1010
from .lookup_lists import lookup_list
1111

12+
from .reference_complexity import (
13+
chunked_sentence_complexity,
14+
15+
from .question_type import (
16+
question_type_classifier
17+
)
18+
19+
from .communication_style import (
20+
communication_style_classifier
21+
)
22+
1223
from .reference_quality import (
24+
word_count_classifier,
1325
special_character_classifier,
14-
chunked_sentence_complexity,
1526
)
1627

1728
from .dates_and_times import (
@@ -61,8 +72,12 @@
6172
workday_classifier,
6273
deberta_review_classifier,
6374
bert_sentiment_german,
75+
word_count_classifier,
76+
chunked_sentence_complexity,
6477
special_character_classifier,
65-
chunked_sentence_complexity
78+
chunked_sentence_complexity,
79+
question_type_classifier,
80+
communication_style_classifier
6681
]:
6782
module_name = module.__name__.split(".")[-1]
6883
model_name = (
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `action-seeking`, `fact-oriented`, `information-seeking` or `self-revealing`.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pydantic import BaseModel
2+
import requests
3+
4+
INPUT_EXAMPLE = {
5+
"text": "Change the number in row 2 and 3.",
6+
"model_name": "KernAI/multilingual-e5-communication-style",
7+
}
8+
9+
10+
class CommunicationStyleClassifierModel(BaseModel):
11+
text: str
12+
model_name: str
13+
14+
class Config:
15+
schema_extra = {"example": INPUT_EXAMPLE}
16+
17+
18+
def communication_style_classifier(req: CommunicationStyleClassifierModel):
19+
"""Uses custom E5 model to classify communication style of a text"""
20+
payload = {
21+
"model_name": req.model_name,
22+
"text": req.text
23+
}
24+
response = requests.post("https://free.api.kern.ai/inference", json=payload)
25+
if response.ok:
26+
return {"communication_style": response.json()["label"]}
27+
return response.raise_for_status()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
```python
2+
import requests
3+
4+
def communication_style_classifier(text: str, model_name: str, request_url: str = "https://free.api.kern.ai/inference") -> str:
5+
"""
6+
@param text: text with a user query you want to classify
7+
@param model_name: Name of a model provided by Kern AI
8+
@param request_url: URL to the API endpoint of Kern AI
9+
@return: returns either 'action-seeking', 'fact-oriented', 'information-seeking' or 'self-revealing'.
10+
"""
11+
payload = {
12+
"model_name": model_name,
13+
"text": text
14+
}
15+
response = requests.post(request_url, json=payload)
16+
if response.ok:
17+
return response.json()["label"]
18+
return response.raise_for_status()
19+
20+
21+
# ↑ necessary bricks function
22+
# -----------------------------------------------------------------------------------------
23+
# ↓ example implementation
24+
25+
26+
model_name = "KernAI/multilingual-e5-communication-style"
27+
28+
def example_integration():
29+
texts = ["Change the number in row 2 and 3.", "Can you show me some data from the references?", "I am super happy today."]
30+
for text in texts:
31+
print(f"the communication style of \"{text}\" is \"{communication_style_classifier(text, model_name=model_name)}\"")
32+
33+
example_integration()
34+
```
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
```python
2+
import requests
3+
4+
ATTRIBUTE: str = "text" # only text attributes
5+
MODEL_NAME: str = "KernAI/multilingual-e5-communication-style"
6+
REQUEST_URL: str = "https://free.api.kern.ai/inference"
7+
8+
def communication_style_classifier(record):
9+
payload = {
10+
"model_name": MODEL_NAME,
11+
"text": record[ATTRIBUTE].text
12+
}
13+
response = requests.post(REQUEST_URL, json=payload)
14+
if response.ok:
15+
return response.json()["label"]
16+
```
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from util.configs import build_classifier_function_config
2+
from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
3+
from . import communication_style_classifier, INPUT_EXAMPLE
4+
5+
6+
def get_config():
7+
return build_classifier_function_config(
8+
function=communication_style_classifier,
9+
input_example=INPUT_EXAMPLE,
10+
issue_id=343,
11+
tabler_icon="CircleDotted",
12+
min_refinery_version="1.7.0",
13+
state=State.PUBLIC.value,
14+
type="python_function",
15+
available_for=["refinery", "common"],
16+
part_of_group=[
17+
"communication_style"
18+
], # first entry should be parent directory
19+
# bricks integrator information
20+
integrator_inputs={
21+
"name": "communication_style_classifier",
22+
"refineryDataType": RefineryDataType.TEXT.value,
23+
"outputs": ["action-seeking", "fact-oriented", "information-seeking", "self-revealing"],
24+
"variables": {
25+
"ATTRIBUTE": {
26+
"selectionType": SelectionType.CHOICE.value,
27+
"addInfo": [
28+
BricksVariableType.ATTRIBUTE.value,
29+
BricksVariableType.GENERIC_STRING.value
30+
]
31+
},
32+
"MODEL_NAME": {
33+
"selectionType": SelectionType.STRING.value,
34+
"defaultValue": "KernAI/multilingual-e5-communication-style",
35+
"addInfo": [
36+
BricksVariableType.GENERIC_STRING.value
37+
]
38+
},
39+
"REQUEST_URL": {
40+
"selectionType": SelectionType.STRING.value,
41+
"defaultValue": "https://free.api.kern.ai/inference",
42+
"addInfo": [
43+
BricksVariableType.GENERIC_STRING.value
44+
]
45+
}
46+
}
47+
}
48+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `keyword-question`, `statement-question` or `interrogative-question`.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pydantic import BaseModel
2+
import requests
3+
4+
INPUT_EXAMPLE = {
5+
"text": "Sushi restaurants Barcelona",
6+
"model_name": "KernAI/multilingual-e5-question-type",
7+
}
8+
9+
10+
class QuestionTypeClassifierModel(BaseModel):
11+
text: str
12+
model_name: str
13+
14+
class Config:
15+
schema_extra = {"example": INPUT_EXAMPLE}
16+
17+
18+
def question_type_classifier(req: QuestionTypeClassifierModel):
19+
"""Uses custom E5 model to classify the question type of a text"""
20+
payload = {
21+
"model_name": req.model_name,
22+
"text": req.text
23+
}
24+
response = requests.post("https://free.api.kern.ai/inference", json=payload)
25+
if response.ok:
26+
return {"question_type": response.json()["label"]}
27+
return response.raise_for_status()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
```python
2+
import requests
3+
4+
def question_type_classifier(text: str, model_name: str, request_url: str = "https://free.api.kern.ai/inference") -> str:
5+
"""
6+
@param text: text with a user query you want to classify
7+
@param model_name: Name of a model provided by Kern AI
8+
@param request_url: URL to the API endpoint of Kern AI
9+
@return: returns either 'keyword-question', 'interrogative-question' or 'statement-question'
10+
"""
11+
payload = {
12+
"model_name": model_name,
13+
"text": text
14+
}
15+
response = requests.post(request_url, json=payload)
16+
if response.ok:
17+
return response.json()["label"]
18+
return response.raise_for_status()
19+
20+
21+
# ↑ necessary bricks function
22+
# -----------------------------------------------------------------------------------------
23+
# ↓ example implementation
24+
25+
26+
model_name = "KernAI/multilingual-e5-question-type"
27+
28+
def example_integration():
29+
texts = ["Travel documents Germany", "Give me documents related to travel insurance.", "What is the content of these documents about?"]
30+
for text in texts:
31+
print(f"the question type of \"{text}\" is \"{question_type_classifier(text, model_name=model_name)}\"")
32+
33+
example_integration()
34+
```
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
```python
2+
import requests
3+
4+
ATTRIBUTE: str = "text" # only text attributes
5+
MODEL_NAME: str = "KernAI/multilingual-e5-question-type"
6+
REQUEST_URL: str = "https://free.api.kern.ai/inference"
7+
8+
def question_type_classifier(record):
9+
payload = {
10+
"model_name": MODEL_NAME,
11+
"text": record[ATTRIBUTE].text
12+
}
13+
response = requests.post(REQUEST_URL, json=payload)
14+
if response.ok:
15+
return response.json()["label"]
16+
```
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from util.configs import build_classifier_function_config
2+
from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
3+
from . import question_type_classifier, INPUT_EXAMPLE
4+
5+
6+
def get_config():
7+
return build_classifier_function_config(
8+
function=question_type_classifier,
9+
input_example=INPUT_EXAMPLE,
10+
issue_id=344,
11+
tabler_icon="ZoomQuestion",
12+
min_refinery_version="1.7.0",
13+
state=State.PUBLIC.value,
14+
type="python_function",
15+
available_for=["refinery", "common"],
16+
part_of_group=[
17+
"question_type"
18+
], # first entry should be parent directory
19+
# bricks integrator information
20+
integrator_inputs={
21+
"name": "question_type_classifier",
22+
"refineryDataType": RefineryDataType.TEXT.value,
23+
"outputs": ["keyword-question", "statement-question", "interrogative-question"],
24+
"variables": {
25+
"ATTRIBUTE": {
26+
"selectionType": SelectionType.CHOICE.value,
27+
"addInfo": [
28+
BricksVariableType.ATTRIBUTE.value,
29+
BricksVariableType.GENERIC_STRING.value
30+
]
31+
},
32+
"MODEL_NAME": {
33+
"selectionType": SelectionType.STRING.value,
34+
"defaultValue": "KernAI/multilingual-e5-question-type",
35+
"addInfo": [
36+
BricksVariableType.GENERIC_STRING.value
37+
]
38+
},
39+
"REQUEST_URL": {
40+
"selectionType": SelectionType.STRING.value,
41+
"defaultValue": "https://free.api.kern.ai/inference",
42+
"addInfo": [
43+
BricksVariableType.GENERIC_STRING.value
44+
]
45+
}
46+
}
47+
}
48+
)

classifiers/reference_quality/chunked_sentence_complexity/config.py renamed to classifiers/reference_complexity/chunked_sentence_complexity/config.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,18 @@ def get_config():
1414
type="python_function",
1515
available_for=["refinery", "common"],
1616
part_of_group=[
17-
"reference_quality",
17+
"reference_complexity",
1818
"text_analysis"
1919
], # first entry should be parent directory
2020
# bricks integrator information
2121
cognition_init_mapping={
22-
"very easy": "null",
23-
"easy": "null",
24-
"fairly easy": "null",
25-
"standard": "null",
26-
"fairly difficult": "Needs fix",
27-
"difficult": "Needs fix",
28-
"very difficult": "Needs fix",
22+
"very easy": "Low",
23+
"easy": "Low",
24+
"fairly easy": "Low",
25+
"standard": "Medium",
26+
"fairly difficult": "Medium",
27+
"difficult": "High",
28+
"very difficult": "High",
2929
},
3030
integrator_inputs={
3131
"name": "chunked_sentence_complexity",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
This simple brick counts the amount of words in a string by splitting it. If there are less than five words found, it returns "short". If there are less than 20, it returns "medium"
2+
and with more than 20, it returns "long". This brick can be used to check the quality of references for RAG (Retrieval Augmented Generation) use cases.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from pydantic import BaseModel
2+
3+
INPUT_EXAMPLE = {"text": "This is too short!"}
4+
5+
6+
class WordCountClassifierModel(BaseModel):
7+
text: str
8+
9+
class Config:
10+
schema_extra = {"example": INPUT_EXAMPLE}
11+
12+
13+
def word_count_classifier(req: WordCountClassifierModel):
14+
"""Checks the length of a string by counting the number of words in it"""
15+
words = req.text.split()
16+
length = len(words)
17+
if length < 5:
18+
return {"text_length": "short"}
19+
elif length < 20:
20+
return {"text_length": "medium"}
21+
else:
22+
return {"text_length": "long"}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
```python
2+
def word_count_classifier(text: str) -> str:
3+
"""
4+
@param text: text to check the length of.
5+
@return: either 'short', 'medium' or 'long' depending on the counted words.
6+
"""
7+
words = text.split()
8+
length = len(words)
9+
if length < 5:
10+
return "short"
11+
elif length < 20:
12+
return "medium"
13+
else:
14+
return "long"
15+
16+
# ↑ necessary bricks function
17+
# -----------------------------------------------------------------------------------------
18+
# ↓ example implementation
19+
20+
def example_integration():
21+
texts = ["This is short.", "This is a text with medium length.", "This is a longer text with many more words. There is even a second sentence with extra words. Splendid, what a joyful day!"]
22+
for text in texts:
23+
print(f"\"{text}\" is -> {word_count_classifier(text)}")
24+
25+
example_integration()
26+
```

0 commit comments

Comments
 (0)