Merge remote-tracking branch 'origin/main' into word-count-classifier

LeonardPuettmannKern · LeonardPuettmannKern · commit 074197192ab5 · 2023-10-17T14:12:05.000+02:00
diff --git a/classifiers/__init__.py b/classifiers/__init__.py
@@ -9,6 +9,10 @@
 
 from .lookup_lists import lookup_list
 
+from .question_type import (
+    question_type_classifier
+)
+
 from .reference_quality import (
     word_count_classifier,
     special_character_classifier,
@@ -65,6 +69,8 @@
     word_count_classifier,
     chunked_sentence_complexity,
     special_character_classifier,
+    chunked_sentence_complexity,
+    question_type_classifier
 ]:
     module_name = module.__name__.split(".")[-1]
     model_name = (
diff --git a/classifiers/communication_style/communication_style_classifier/README.md b/classifiers/communication_style/communication_style_classifier/README.md
@@ -0,0 +1 @@
+Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `action-seeking`, `fact-oriented`, `information-seeking` or `self-revealing`.
diff --git a/classifiers/communication_style/communication_style_classifier/__init__.py b/classifiers/communication_style/communication_style_classifier/__init__.py
@@ -0,0 +1,27 @@
+from pydantic import BaseModel
+import requests
+
+INPUT_EXAMPLE = {
+    "text": "Change the number in row 2 and 3.",
+    "model_name": "KernAI/multilingual-e5-communication-style",
+}
+
+
+class CommunicationStyleClassifierModel(BaseModel):
+    text: str
+    model_name: str
+
+    class Config:
+        schema_extra = {"example": INPUT_EXAMPLE}
+
+
+def communication_style_classifier(req: CommunicationStyleClassifierModel):
+    """Uses custom E5 model to classify communication style of a text"""
+    payload = {
+        "model_name": req.model_name,
+        "text": req.text
+    }      
+    response = requests.post("https://free.api.kern.ai/inference", json=payload)
+    if response.ok:
+        return {"communication_style": response.json()["label"]}
+    return response.raise_for_status()
diff --git a/classifiers/communication_style/communication_style_classifier/code_snippet_common.md b/classifiers/communication_style/communication_style_classifier/code_snippet_common.md
@@ -0,0 +1,34 @@
+```python
+import requests
+
+def communication_style_classifier(text: str, model_name: str, request_url: str = "https://free.api.kern.ai/inference") -> str:
+    """
+    @param text: text with a user query you want to classify
+    @param model_name: Name of a model provided by Kern AI
+    @param request_url: URL to the API endpoint of Kern AI
+    @return: returns either 'action-seeking', 'fact-oriented', 'information-seeking' or 'self-revealing'.
+    """
+    payload = {
+        "model_name": model_name,
+        "text": text
+    }      
+    response = requests.post(request_url, json=payload)
+    if response.ok:
+        return response.json()["label"]
+    return response.raise_for_status()
+
+
+# ↑ necessary bricks function 
+# -----------------------------------------------------------------------------------------
+# ↓ example implementation 
+
+
+model_name = "KernAI/multilingual-e5-communication-style"
+
+def example_integration():
+    texts = ["Change the number in row 2 and 3.", "Can you show me some data from the references?", "I am super happy today."]
+    for text in texts:
+        print(f"the communication style of \"{text}\" is \"{communication_style_classifier(text, model_name=model_name)}\"")
+
+example_integration()
+```
diff --git a/classifiers/communication_style/communication_style_classifier/code_snippet_refinery.md b/classifiers/communication_style/communication_style_classifier/code_snippet_refinery.md
@@ -0,0 +1,16 @@
+```python
+import requests
+
+ATTRIBUTE: str = "text" # only text attributes
+MODEL_NAME: str = "KernAI/multilingual-e5-communication-style"
+REQUEST_URL: str = "https://free.api.kern.ai/inference"
+
+def communication_style_classifier(record):
+    payload = {
+        "model_name": MODEL_NAME,
+        "text": record[ATTRIBUTE].text
+    }      
+    response = requests.post(REQUEST_URL, json=payload)
+    if response.ok:
+        return response.json()["label"]
+```
diff --git a/classifiers/communication_style/communication_style_classifier/config.py b/classifiers/communication_style/communication_style_classifier/config.py
@@ -0,0 +1,48 @@
+from util.configs import build_classifier_function_config
+from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
+from . import communication_style_classifier, INPUT_EXAMPLE
+
+
+def get_config():
+    return build_classifier_function_config(
+        function=communication_style_classifier,
+        input_example=INPUT_EXAMPLE,
+        issue_id=343,
+        tabler_icon="CircleDotted",
+        min_refinery_version="1.7.0",
+        state=State.PUBLIC.value,
+        type="python_function",
+        available_for=["refinery", "common"],
+        part_of_group=[
+            "communication_style"
+        ],  # first entry should be parent directory
+        # bricks integrator information
+        integrator_inputs={
+            "name": "communication_style_classifier",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "outputs": ["action-seeking", "fact-oriented", "information-seeking", "self-revealing"],
+            "variables": {
+                "ATTRIBUTE": {
+                    "selectionType": SelectionType.CHOICE.value,
+                    "addInfo": [
+                        BricksVariableType.ATTRIBUTE.value,
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MODEL_NAME": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "KernAI/multilingual-e5-communication-style",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "REQUEST_URL": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "https://free.api.kern.ai/inference",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
+        }
+    )
diff --git a/classifiers/question_type/question_type_classifier/README.md b/classifiers/question_type/question_type_classifier/README.md
@@ -0,0 +1 @@
+Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `keyword-question`, `statement-question` or `interrogative-question`.
diff --git a/classifiers/question_type/question_type_classifier/__init__.py b/classifiers/question_type/question_type_classifier/__init__.py
@@ -0,0 +1,27 @@
+from pydantic import BaseModel
+import requests
+
+INPUT_EXAMPLE = {
+    "text": "Sushi restaurants Barcelona",
+    "model_name": "KernAI/multilingual-e5-question-type",
+}
+
+
+class QuestionTypeClassifierModel(BaseModel):
+    text: str
+    model_name: str
+
+    class Config:
+        schema_extra = {"example": INPUT_EXAMPLE}
+
+
+def question_type_classifier(req: QuestionTypeClassifierModel):
+    """Uses custom E5 model to classify the question type of a text"""
+    payload = {
+        "model_name": req.model_name,
+        "text": req.text
+    }      
+    response = requests.post("https://free.api.kern.ai/inference", json=payload)
+    if response.ok:
+        return {"question_type": response.json()["label"]}
+    return response.raise_for_status()
diff --git a/classifiers/question_type/question_type_classifier/code_snippet_common.md b/classifiers/question_type/question_type_classifier/code_snippet_common.md
@@ -0,0 +1,34 @@
+```python
+import requests
+
+def question_type_classifier(text: str, model_name: str, request_url: str = "https://free.api.kern.ai/inference") -> str:
+    """
+    @param text: text with a user query you want to classify
+    @param model_name: Name of a model provided by Kern AI
+    @param request_url: URL to the API endpoint of Kern AI
+    @return: returns either 'keyword-question', 'interrogative-question' or 'statement-question' 
+    """
+    payload = {
+        "model_name": model_name,
+        "text": text
+    }      
+    response = requests.post(request_url, json=payload)
+    if response.ok:
+        return response.json()["label"]
+    return response.raise_for_status()
+
+
+# ↑ necessary bricks function 
+# -----------------------------------------------------------------------------------------
+# ↓ example implementation 
+
+
+model_name = "KernAI/multilingual-e5-question-type"
+
+def example_integration():
+    texts = ["Travel documents Germany", "Give me documents related to travel insurance.", "What is the content of these documents about?"]
+    for text in texts:
+        print(f"the question type of \"{text}\" is \"{question_type_classifier(text, model_name=model_name)}\"")
+
+example_integration()
+```
diff --git a/classifiers/question_type/question_type_classifier/code_snippet_refinery.md b/classifiers/question_type/question_type_classifier/code_snippet_refinery.md
@@ -0,0 +1,16 @@
+```python
+import requests
+
+ATTRIBUTE: str = "text" # only text attributes
+MODEL_NAME: str = "KernAI/multilingual-e5-question-type"
+REQUEST_URL: str = "https://free.api.kern.ai/inference"
+
+def question_type_classifier(record):
+    payload = {
+        "model_name": MODEL_NAME,
+        "text": record[ATTRIBUTE].text
+    }      
+    response = requests.post(REQUEST_URL, json=payload)
+    if response.ok:
+        return response.json()["label"]
+```
diff --git a/classifiers/question_type/question_type_classifier/config.py b/classifiers/question_type/question_type_classifier/config.py
@@ -0,0 +1,48 @@
+from util.configs import build_classifier_function_config
+from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
+from . import question_type_classifier, INPUT_EXAMPLE
+
+
+def get_config():
+    return build_classifier_function_config(
+        function=question_type_classifier,
+        input_example=INPUT_EXAMPLE,
+        issue_id=344,
+        tabler_icon="ZoomQuestion",
+        min_refinery_version="1.7.0",
+        state=State.PUBLIC.value,
+        type="python_function",
+        available_for=["refinery", "common"],
+        part_of_group=[
+            "question_type"
+        ],  # first entry should be parent directory
+        # bricks integrator information
+        integrator_inputs={
+            "name": "question_type_classifier",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "outputs": ["keyword-question", "statement-question", "interrogative-question"],
+            "variables": {
+                "ATTRIBUTE": {
+                    "selectionType": SelectionType.CHOICE.value,
+                    "addInfo": [
+                        BricksVariableType.ATTRIBUTE.value,
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MODEL_NAME": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "KernAI/multilingual-e5-question-type",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "REQUEST_URL": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "https://free.api.kern.ai/inference",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
+        }
+    )
diff --git a/extractors/personal_identifiers/address_extraction/config.py b/extractors/personal_identifiers/address_extraction/config.py
@@ -11,7 +11,7 @@ def get_config():
         issue_id=62,
         tabler_icon="AddressBook",
         min_refinery_version="1.7.0",
-        state=State.PUBLIC.value,
+        state=State.DRAFT.value,
         type="python_function",
         available_for=["refinery", "common"],
         part_of_group=[
diff --git a/extractors/personal_identifiers/zipcode_extraction/code_snippet_common.md b/extractors/personal_identifiers/zipcode_extraction/code_snippet_common.md
diff --git a/extractors/personal_identifiers/zipcode_extraction/code_snippet_refinery.md b/extractors/personal_identifiers/zipcode_extraction/code_snippet_refinery.md

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Uses an `intfloat/multilingual-e5-small` model, which was finetuned on english and german examples of different question types. The model is hosted on Kern AIs own infrastructure and is meant to be used to classify text sequences by the labels `action-seeking`, `fact-oriented`, `information-seeking` or `self-revealing`.