diff --git a/Makefile b/Makefile index 7da49bc..9ec0344 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ install: .PHONY: run map map: - python examples/map.py + python examples/tasks/map/map.py run: map .PHONY: benchmark_match, benchmark_mapper, test diff --git a/examples/kafka/s2_producer.py b/examples/kafka/s2_producer.py index db10b62..4276296 100644 --- a/examples/kafka/s2_producer.py +++ b/examples/kafka/s2_producer.py @@ -23,6 +23,7 @@ # User data to be sent user_data = {"name": "John Doe", "age": 28} +print("User:", user_data) # Produce message producer.produce(topic='user-info', key=str(user_data['name']), value=user_data) diff --git a/examples/kafka/s4_producer_v2.py b/examples/kafka/s4_producer_v2.py index c97a355..3a6d6b8 100644 --- a/examples/kafka/s4_producer_v2.py +++ b/examples/kafka/s4_producer_v2.py @@ -18,6 +18,7 @@ # Updated user data to be sent with the new schema user_data = {"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"} +print("User:", user_data) # Produce message producer.produce(topic='user-info', key=str(user_data['name']), value=user_data) diff --git a/examples/tasks/assemble/assemble.py b/examples/tasks/assemble/assemble.py index 2b5256e..edbd156 100644 --- a/examples/tasks/assemble/assemble.py +++ b/examples/tasks/assemble/assemble.py @@ -1,8 +1,43 @@ +import pandas as pd + import llmint +from llmint.assemble.pandas import assemble, construct def main(): - llmint.assemble() + source_schema = ''' + { + "fields": [ + {"name": "Fname", "type": "string"}, + {"name": "Lname", "type": "string"}, + {"name": "Age", "type": "int"}, + {"name": "Email", "type": ["null", "string"], "default": null} + ] + } + ''' + target_schema = ''' + { + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "int"}, + {"name": "email", "type": ["null", "string"], "default": null} + ] + } + ''' + + + source_df = pd.DataFrame([{"Fname": "Josh", "Lname": "Doe", "Age": 31, "Email": "joshdoe@example.com"}]) + dest_df = pd.DataFrame([{"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"}]) + print("Concat the source dataframe to the dest dataframe:") + print("Source:", source_df, sep="\n") + print("Dest:", dest_df, sep="\n") + + mappings = llmint.map(source_schema, target_schema) + assembly = assemble(mappings) + output = construct(source_df, assembly) + + combined_df = pd.concat([dest_df, output], axis=0) + print("\nCombined:", combined_df, sep="\n") if __name__ == "__main__": diff --git a/llmint/assemble/pandas/__init__.py b/llmint/assemble/pandas/__init__.py new file mode 100644 index 0000000..9cfa181 --- /dev/null +++ b/llmint/assemble/pandas/__init__.py @@ -0,0 +1 @@ +from llmint.assemble.pandas.function import assemble, construct diff --git a/llmint/assemble/pandas/function.py b/llmint/assemble/pandas/function.py new file mode 100644 index 0000000..b9f2be2 --- /dev/null +++ b/llmint/assemble/pandas/function.py @@ -0,0 +1,39 @@ +import pandas as pd +from typing import List, Callable + +from llmint.assemble.pandas.transform import ( + add, copy, default, missing, apply, scale, shift +) +from llmint.map.function import Map + + +def assemble(mappings: list[Map]): + output = [] + + for mapping in mappings: + match mapping.transformation.split(' ')[0]: + case 'ADD': + output.append(add(mapping)) + case 'COPY': + output.append(copy(mapping)) + case 'DEFAULT': + output.append(default(mapping)) + case 'MISSING': + output.append(missing(mapping)) + case 'APPLY': + output.append(apply(mapping)) + case 'SCALE': + output.append(scale(mapping)) + case 'SHIFT': + output.append(shift(mapping)) + + return output + + +def construct(df: pd.DataFrame, assembly: List[Callable[[pd.DataFrame], pd.Series]]): + df_output = [] + + for func in assembly: + df_output.append(func(df)) + + return pd.concat(df_output, axis=1) diff --git a/llmint/assemble/pandas/transform/__init__.py b/llmint/assemble/pandas/transform/__init__.py new file mode 100644 index 0000000..0c10cf9 --- /dev/null +++ b/llmint/assemble/pandas/transform/__init__.py @@ -0,0 +1,8 @@ +from llmint.assemble.pandas.transform.field.add import func as add +from llmint.assemble.pandas.transform.field.copy import func as copy +from llmint.assemble.pandas.transform.field.default import func as default +from llmint.assemble.pandas.transform.field.missing import func as missing + +from llmint.assemble.pandas.transform.value.apply import func as apply +from llmint.assemble.pandas.transform.value.scale import func as scale +from llmint.assemble.pandas.transform.value.shift import func as shift diff --git a/llmint/assemble/pandas/transform/field/__init__.py b/llmint/assemble/pandas/transform/field/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/llmint/assemble/pandas/transform/field/add.py b/llmint/assemble/pandas/transform/field/add.py new file mode 100644 index 0000000..398f0a9 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/add.py @@ -0,0 +1,10 @@ +import re +from pandas import Series + +from llmint.map.function import Map + + +def func(mapping: Map): + col_type = re.search(r'TYPE (\w+)', mapping.transformation).group(1) + + return lambda df: Series([], name=mapping.target_field, dtype=col_type) diff --git a/llmint/assemble/pandas/transform/field/copy.py b/llmint/assemble/pandas/transform/field/copy.py new file mode 100644 index 0000000..b47042c --- /dev/null +++ b/llmint/assemble/pandas/transform/field/copy.py @@ -0,0 +1,7 @@ +from pandas import Series + +from llmint.map.function import Map + + +def func(mapping: Map): + return lambda df: Series(df[mapping.source_field], name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/field/default.py b/llmint/assemble/pandas/transform/field/default.py new file mode 100644 index 0000000..cdc540a --- /dev/null +++ b/llmint/assemble/pandas/transform/field/default.py @@ -0,0 +1,10 @@ +import re +from pandas import Series + +from llmint.map.function import Map + + +def func(mapping: Map): + default_val = re.search(r'DEFAULT TO (.*)', mapping.transformation).group(1) + + return lambda df: Series([default_val] * len(df), name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/field/missing.py b/llmint/assemble/pandas/transform/field/missing.py new file mode 100644 index 0000000..9c25dfb --- /dev/null +++ b/llmint/assemble/pandas/transform/field/missing.py @@ -0,0 +1,5 @@ +from llmint.map.function import Map + + +def func(mapping: Map): + return lambda df: print(f"WARNING: {mapping.target_field} field cannot be automatically converted.") diff --git a/llmint/assemble/pandas/transform/value/__init__.py b/llmint/assemble/pandas/transform/value/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/llmint/assemble/pandas/transform/value/apply.py b/llmint/assemble/pandas/transform/value/apply.py new file mode 100644 index 0000000..fb370e1 --- /dev/null +++ b/llmint/assemble/pandas/transform/value/apply.py @@ -0,0 +1,19 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(mapping: Map): + apply_func = re.search(r'APPLY (.*)', mapping.transformation).group(1) + + def apply(df: DataFrame): + # assign all columns to their own variables + for col in df.columns: + exec(f'{col.replace(" ", "_")} = df[col]', locals(), globals()) + + exec(f'_output = {apply_func}', locals(), globals()) + + return Series(_output, name=mapping.target_field) + + return apply diff --git a/llmint/assemble/pandas/transform/value/scale.py b/llmint/assemble/pandas/transform/value/scale.py new file mode 100644 index 0000000..845d5f9 --- /dev/null +++ b/llmint/assemble/pandas/transform/value/scale.py @@ -0,0 +1,12 @@ +import re + +from llmint.map.function import Map + + +def func(mapping: Map): + try: + scale = float(re.search(r'SCALE BY (\d*.\d*)', mapping.transformation).group(1)) + except ValueError: + return lambda df: df[mapping.source_field].copy() + + return lambda df: df[mapping.source_field] * scale diff --git a/llmint/assemble/pandas/transform/value/shift.py b/llmint/assemble/pandas/transform/value/shift.py new file mode 100644 index 0000000..d5310c1 --- /dev/null +++ b/llmint/assemble/pandas/transform/value/shift.py @@ -0,0 +1,12 @@ +import re + +from llmint.map.function import Map + + +def func(mapping: Map): + try: + shift = float(re.search(r'SHIFT BY (\d*.\d*)', mapping.transformation).group(1)) + except ValueError: + return lambda df: df[mapping.source_field].copy() + + return lambda df: df[mapping.source_field] + shift diff --git a/llmint/core/eval.py b/llmint/core/eval.py index 835a966..5a1ba4d 100644 --- a/llmint/core/eval.py +++ b/llmint/core/eval.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + class pcolors: RIGHT = '\033[92m' WRONG = '\033[91m' @@ -43,13 +46,11 @@ def accuracy(output: list, test_example: list): f1 = 0 return precision, recall, f1 -def print_mappings(mappings: dict, include_reasoning=True): - for name, response in mappings.items(): - mapping, reasoning = response - if include_reasoning: - - print(pcolors.RIGHT + mapping + pcolors.ENDC + '\n', - reasoning, flush=True) - else: - print(pcolors.RIGHT + mapping + pcolors.ENDC, - flush=True) +def print_mappings(mappings: list[Map], include_reasoning=True): + for mapping in mappings: + if include_reasoning: + print(pcolors.RIGHT + mapping.__dict__ + pcolors.ENDC + '\n', + mapping.reasoning, flush=True) + else: + print(pcolors.RIGHT + mapping.__dict__ + pcolors.ENDC, + flush=True) diff --git a/llmint/map/function.py b/llmint/map/function.py index 77456d5..1b0d686 100644 --- a/llmint/map/function.py +++ b/llmint/map/function.py @@ -1,8 +1,18 @@ +from pydantic import BaseModel + from llmint.core import model from llmint.map import prompt, parameter + +class Map(BaseModel): + source_field: str | None + target_field: str + transformation: str + reasoning: str | None + + def map(source_schema, target_schema): - mappings = model.call( + output = model.call( prompt=[ {"role": "system", "content": prompt.system}, {"role": "user", "content": prompt.user.format( @@ -15,5 +25,12 @@ def map(source_schema, target_schema): temperature=parameter.temperature, seed=parameter.seed, max_model_call=1, # only one model call - )["tool_outputs"][0] # take the first tool output + )["tool_outputs"] + + # process the mappings + mappings = [] + for mapping in output: + for _, mapping in mapping.items(): + mappings.append(mapping) + return mappings diff --git a/llmint/map/parameter.py b/llmint/map/parameter.py index fa3b1ec..e8bbd7f 100644 --- a/llmint/map/parameter.py +++ b/llmint/map/parameter.py @@ -7,16 +7,18 @@ "llmint.map.match", # field transformation "llmint.map.transform.field.add", - "llmint.map.transform.field.cast", + # "llmint.map.transform.field.cast", "llmint.map.transform.field.copy", "llmint.map.transform.field.default", - "llmint.map.transform.field.delete", - "llmint.map.transform.field.rename", + # "llmint.map.transform.field.delete", + # "llmint.map.transform.field.rename", "llmint.map.transform.field.missing", # value transformation "llmint.map.transform.value.apply", # "llmint.map.transform.value.gen", - "llmint.map.transform.value.link", + # "llmint.map.transform.value.link", "llmint.map.transform.value.scale", "llmint.map.transform.value.shift", ] + +reasoning = False diff --git a/llmint/map/prompt.py b/llmint/map/prompt.py index 81c70d6..b429e7c 100644 --- a/llmint/map/prompt.py +++ b/llmint/map/prompt.py @@ -69,3 +69,6 @@ user = "Source Schema: ' + {source_schema} + " \ "'\nTarget Schema: ' + {target_schema}" + +"""Reasoning prompt""" +reasoning_prompt = "In-depth reasoning as to why you chose this function" diff --git a/llmint/map/transform/field/add.py b/llmint/map/transform/field/add.py index 6eeb60a..f390ac2 100644 --- a/llmint/map/transform/field/add.py +++ b/llmint/map/transform/field/add.py @@ -1,31 +1,37 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "ADD" +description = "Add an optional target field" +properties = { + "target_field": (str, "Optional field in the target schema"), + "field_type": (str, "The type of the field to be added"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Add an optional target field", + "description": description, "parameters": { "type": "object", - "properties": { - "target_field": { - "type": "string", - "description": "Optional field in the target schema", - }, - "field_type": { - "type": "string", - "description": "The type of the field to be added", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "field_type", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(target_field, field_type, reasoning): - return (f'{{from: None, to: {target_field}, ' - f'transformation: ADD {target_field} TYPE {field_type}}}', reasoning) +def func(target_field, field_type, reasoning=None): + return Map(source_field=None, + target_field=target_field, + transformation=f'ADD {target_field} TYPE {field_type}', + reasoning=reasoning) diff --git a/llmint/map/transform/field/cast.py b/llmint/map/transform/field/cast.py index 2ecbf03..4512815 100644 --- a/llmint/map/transform/field/cast.py +++ b/llmint/map/transform/field/cast.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "CAST" schema = { "type": "function", @@ -35,5 +38,7 @@ def func(source_field, target_field, source_type, target_type, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: CAST {source_field} FROM {source_type} TO {target_type}}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'CAST FROM {source_type} TO {target_type}', + reasoning=reasoning) diff --git a/llmint/map/transform/field/copy.py b/llmint/map/transform/field/copy.py index 0f84d8a..1a7cef9 100644 --- a/llmint/map/transform/field/copy.py +++ b/llmint/map/transform/field/copy.py @@ -1,32 +1,38 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "COPY" +description = "Directly copies data from the source field to the target field without any transformation." +properties = { + "source_field": (str, "Field in the source schema"), + "target_field": (str, "Field in the target schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Directly copies data from the source field " - "to the target field without any transformation..", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field in the source schema", - }, - "target_field": { - "type": "string", - "description": "Field in the target schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "field_type", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: COPY}}', reasoning) + +def func(source_field, target_field, reasoning=None): + return Map(source_field=source_field, + target_field=target_field, + transformation=f'COPY', + reasoning=reasoning) diff --git a/llmint/map/transform/field/default.py b/llmint/map/transform/field/default.py index 8d32726..79c521f 100644 --- a/llmint/map/transform/field/default.py +++ b/llmint/map/transform/field/default.py @@ -1,36 +1,38 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "DEFAULT" +description = "Set the default of a target field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "default_value": (str, "Default value of the target field"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Set the default of a target field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "default_value": { - "type": "string", - "description": "Default value of the target field", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "default_value", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, default_value, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: DEFAULT {target_field} TO {default_value}}}', reasoning) +def func(source_field, target_field, default_value, reasoning=None): + return Map(source_field=source_field, + target_field=target_field, + transformation=f'DEFAULT TO {default_value}', + reasoning=reasoning) diff --git a/llmint/map/transform/field/delete.py b/llmint/map/transform/field/delete.py index 15c3eef..3b89849 100644 --- a/llmint/map/transform/field/delete.py +++ b/llmint/map/transform/field/delete.py @@ -1,27 +1,36 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "DELETE" +description = "Delete a source field" +properties = { + "source_field": (str, "Field from the source schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Delete a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, reasoning): - return (f'{{from: {source_field}, to: None, ' - f'transformation: DELETE {source_field}}}', reasoning) +def func(source_field, reasoning=None): + return Map(source_field=source_field, + target_field=None, + transformation=f'DELETE', + reasoning=reasoning) diff --git a/llmint/map/transform/field/missing.py b/llmint/map/transform/field/missing.py index e0d30bc..ce5e11d 100644 --- a/llmint/map/transform/field/missing.py +++ b/llmint/map/transform/field/missing.py @@ -1,27 +1,36 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "MISSING" +description = "Indicates that the required target field is impossible to construct from the fields in the source schema" +properties = { + "target_field": (str, "Field from the target schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Indicates that the required target field is impossible to construct from the fields in the source schema", + "description": description, "parameters": { "type": "object", - "properties": { - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(target_field, reasoning): - return (f'{{from: None, to: {target_field}, ' - f'transformation: MISSING {target_field}}}', reasoning) +def func(target_field, reasoning=None): + return Map(source_field=None, + target_field=target_field, + transformation=f'MISSING', + reasoning=reasoning) diff --git a/llmint/map/transform/field/rename.py b/llmint/map/transform/field/rename.py index 939d3fc..eccacd1 100644 --- a/llmint/map/transform/field/rename.py +++ b/llmint/map/transform/field/rename.py @@ -1,32 +1,37 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "RENAME" +description = "Rename a source field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Rename a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: RENAME {source_field} TO {target_field}}}', reasoning) +def func(source_field, target_field, reasoning=None): + return Map(source_field=source_field, + target_field=target_field, + transformation=f'RENAME TO {target_field}', + reasoning=reasoning) diff --git a/llmint/map/transform/value/apply.py b/llmint/map/transform/value/apply.py index 5589aa4..8b9ccab 100644 --- a/llmint/map/transform/value/apply.py +++ b/llmint/map/transform/value/apply.py @@ -1,35 +1,38 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "APPLY" +description = "Apply a function to the values of a source field" +properties = { + "target_field": (str, "Field from the target schema"), + "function": (str, "An expression involving source schema field(s) to apply, " + "replace any spaces in the schema fields with underscores"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Apply a function to the values of a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "function_name": { - "type": "string", - "description": "Function to apply", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "function_name", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, function_name, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: APPLY {source_field} {function_name}}}', reasoning) +def func(target_field, function, reasoning=None): + return Map(source_field=None, + target_field=target_field, + transformation=f'APPLY {function}', + reasoning=reasoning) diff --git a/llmint/map/transform/value/gen.py b/llmint/map/transform/value/gen.py index fcfac22..5e3456a 100644 --- a/llmint/map/transform/value/gen.py +++ b/llmint/map/transform/value/gen.py @@ -1,31 +1,31 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + name = "GEN" +description = "Describes the equation needed to convert from source to target values" +properties = { + "source_field": (str, "Field from the target schema"), + "target_field": (str, "Field from the target schema"), + "conversion_equation": (str, "Mathematical equation used in conversion. " + "Let x be the source value and y be the target value."), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Describes the equation needed to convert from source to target values", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the target schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "conversion_equation": { - "type": "string", - "description": "Mathematical equation used in conversion. " - "Let x be the source value and y be the target value.", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "conversion_equation", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } diff --git a/llmint/map/transform/value/link.py b/llmint/map/transform/value/link.py index 33c19b8..9179351 100644 --- a/llmint/map/transform/value/link.py +++ b/llmint/map/transform/value/link.py @@ -1,40 +1,38 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "LINK" +description = "Create a mapping between a value in the source field to a value in the target field, usually for enum type values" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "old_value": (str, "Source field value"), + "new_value": (str, "Target field value"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Create a mapping between a value in the source field to a value in the target field, usually for enum type values", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "old_value": { - "type": "string", - "description": "Source field value", - }, - "new_value": { - "type": "string", - "description": "Target field value", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "old_value", "new_value", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, old_value, new_value, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: LINK {source_field} "{old_value}" TO "{new_value}"}}', - reasoning) \ No newline at end of file +def func(source_field, target_field, old_value, new_value, reasoning=None): + return Map(source_field=source_field, + target_field=target_field, + transformation=f'LINK {source_field} "{old_value}" TO "{new_value}"', + reasoning=reasoning) \ No newline at end of file diff --git a/llmint/map/transform/value/scale.py b/llmint/map/transform/value/scale.py index 97b57ce..1a2f767 100644 --- a/llmint/map/transform/value/scale.py +++ b/llmint/map/transform/value/scale.py @@ -1,36 +1,38 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "SCALE" +description = "Scale the value of a source field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "factor": (str, "Factor to multiply the source field by"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Scale the value of a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "factor": { - "type": "string", - "description": "Factor to multiply the source field by", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "factor", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, factor, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: SCALE {source_field} BY {factor}}}', reasoning) +def func(source_field, target_field, factor, reasoning=None): + return Map(source_field=source_field, + target_field=target_field, + transformation=f'SCALE BY {factor}', + reasoning=reasoning) diff --git a/llmint/map/transform/value/shift.py b/llmint/map/transform/value/shift.py index 9e8da38..04d9445 100644 --- a/llmint/map/transform/value/shift.py +++ b/llmint/map/transform/value/shift.py @@ -1,35 +1,38 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + + name = "SHIFT" +description = "Shift the value of a source field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "value": (str, "Value to shift the source field by"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Shift the value of a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "value": { - "type": "string", - "description": "Value to shift the source field by", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "value", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, value, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: SHIFT {source_field} BY {value}}}', reasoning) +def func(source_field, target_field, value, reasoning=None): + return Map(source_field=source_field, + target_field=target_field, + transformation=f'SHIFT BY {value}', + reasoning=reasoning)