diff --git a/alembic/versions/0c8eb3ff1c71_adds_playground_question.py b/alembic/versions/0c8eb3ff1c71_adds_playground_question.py
new file mode 100644
index 00000000..64eb7f3f
--- /dev/null
+++ b/alembic/versions/0c8eb3ff1c71_adds_playground_question.py
@@ -0,0 +1,46 @@
+"""adds_playground_question
+
+Revision ID: 0c8eb3ff1c71
+Revises: ac97442726d2
+Create Date: 2025-02-04 09:48:41.971287
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "0c8eb3ff1c71"
+down_revision = "ac97442726d2"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "playground_question",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("question", sa.String(), nullable=True),
+        sa.Column("created_at", sa.DateTime(), nullable=True),
+        sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_playground_question_project_id"),
+        "playground_question",
+        ["project_id"],
+        unique=False,
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(
+        op.f("ix_playground_question_project_id"), table_name="playground_question"
+    )
+    op.drop_table("playground_question")
+    # ### end Alembic commands ###
diff --git a/alembic/versions/ac97442726d2_adds_refinery_playground_evaluation.py b/alembic/versions/ac97442726d2_adds_refinery_playground_evaluation.py
new file mode 100644
index 00000000..fcad86f3
--- /dev/null
+++ b/alembic/versions/ac97442726d2_adds_refinery_playground_evaluation.py
@@ -0,0 +1,83 @@
+"""adds_refinery_playground_evaluation
+
+Revision ID: ac97442726d2
+Revises: eb5ecbee5090
+Create Date: 2025-01-16 13:26:43.059523
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = 'ac97442726d2'
+down_revision = 'eb5ecbee5090'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('evaluation_group',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('name', sa.String(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('project_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('evaluation_set_ids', sa.JSON(), nullable=True),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['project_id'], ['project.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_evaluation_group_created_by'), 'evaluation_group', ['created_by'], unique=False)
+    op.create_index(op.f('ix_evaluation_group_project_id'), 'evaluation_group', ['project_id'], unique=False)
+    op.create_table('evaluation_set',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('question', sa.String(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('project_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('record_ids', sa.JSON(), nullable=True),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['project_id'], ['project.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_evaluation_set_created_by'), 'evaluation_set', ['created_by'], unique=False)
+    op.create_index(op.f('ix_evaluation_set_project_id'), 'evaluation_set', ['project_id'], unique=False)
+    op.create_table('evaluation_run',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('evaluation_group_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('project_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('embedding_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('state', sa.String(), nullable=True),
+    sa.Column('results', sa.JSON(), nullable=True),
+    sa.Column('meta_info', sa.JSON(), nullable=True),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['embedding_id'], ['embedding.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['evaluation_group_id'], ['evaluation_group.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['project_id'], ['project.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_evaluation_run_created_by'), 'evaluation_run', ['created_by'], unique=False)
+    op.create_index(op.f('ix_evaluation_run_embedding_id'), 'evaluation_run', ['embedding_id'], unique=False)
+    op.create_index(op.f('ix_evaluation_run_evaluation_group_id'), 'evaluation_run', ['evaluation_group_id'], unique=False)
+    op.create_index(op.f('ix_evaluation_run_project_id'), 'evaluation_run', ['project_id'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_evaluation_run_project_id'), table_name='evaluation_run')
+    op.drop_index(op.f('ix_evaluation_run_evaluation_group_id'), table_name='evaluation_run')
+    op.drop_index(op.f('ix_evaluation_run_embedding_id'), table_name='evaluation_run')
+    op.drop_index(op.f('ix_evaluation_run_created_by'), table_name='evaluation_run')
+    op.drop_table('evaluation_run')
+    op.drop_index(op.f('ix_evaluation_set_project_id'), table_name='evaluation_set')
+    op.drop_index(op.f('ix_evaluation_set_created_by'), table_name='evaluation_set')
+    op.drop_table('evaluation_set')
+    op.drop_index(op.f('ix_evaluation_group_project_id'), table_name='evaluation_group')
+    op.drop_index(op.f('ix_evaluation_group_created_by'), table_name='evaluation_group')
+    op.drop_table('evaluation_group')
+    # ### end Alembic commands ###
diff --git a/app.py b/app.py
index f152011f..80969c89 100644
--- a/app.py
+++ b/app.py
@@ -30,6 +30,7 @@
 from fast_api.routes.weak_supervision import router as weak_supervision_router
 from fast_api.routes.labeling_tasks import router as labeling_tasks_router
 from fast_api.routes.task_execution import router as task_execution_router
+from fast_api.routes.playground import router as playground_router
 from middleware.database_session import handle_db_session
 from middleware.starlette_tmp_middleware import DatabaseSessionHandler
 from starlette.applications import Starlette
@@ -55,6 +56,7 @@
     PREFIX_WEAK_SUPERVISION,
     PREFIX_LABELING_TASKS,
     PREFIX_TASK_EXECUTION,
+    PREFIX_PLAYGROUND,
 )
 from util import security, clean_up
 from middleware import log_storage
@@ -106,7 +108,9 @@
 fastapi_app.include_router(
     labeling_tasks_router, prefix=PREFIX_LABELING_TASKS, tags=["labeling-tasks"]
 )
-
+fastapi_app.include_router(
+    playground_router, prefix=PREFIX_PLAYGROUND, tags=["playground"]
+)
 fastapi_app_internal = FastAPI()
 fastapi_app_internal.include_router(
     task_execution_router, prefix=PREFIX_TASK_EXECUTION, tags=["task-execution"]
diff --git a/controller/embedding/connector.py b/controller/embedding/connector.py
index 96886321..453d237d 100644
--- a/controller/embedding/connector.py
+++ b/controller/embedding/connector.py
@@ -5,17 +5,17 @@
 from util import service_requests
 import requests
 
-BASE_URI = os.getenv("EMBEDDING_SERVICE")
+EMBEDDING_BASE_URI = os.getenv("EMBEDDING_SERVICE")
 NEURAL_SEARCH_BASE_URI = os.getenv("NEURAL_SEARCH")
 
 
 def request_listing_recommended_encoders() -> Any:
-    url = f"{BASE_URI}/classification/recommend/TEXT"  # TODO does here have to be a data type?
+    url = f"{EMBEDDING_BASE_URI}/classification/recommend/TEXT"
     return service_requests.get_call_or_raise(url)
 
 
 def request_embedding(project_id: str, embedding_id: str) -> Any:
-    url = f"{BASE_URI}/embed"
+    url = f"{EMBEDDING_BASE_URI}/embed"
     data = {
         "project_id": str(project_id),
         "embedding_id": str(embedding_id),
@@ -24,12 +24,12 @@ def request_embedding(project_id: str, embedding_id: str) -> Any:
 
 
 def request_deleting_embedding(project_id: str, embedding_id: str) -> Any:
-    url = f"{BASE_URI}/delete/{project_id}/{embedding_id}"
+    url = f"{EMBEDDING_BASE_URI}/delete/{project_id}/{embedding_id}"
     return service_requests.delete_call_or_raise(url)
 
 
 def request_tensor_upload(project_id: str, embedding_id: str) -> None:
-    url = f"{BASE_URI}/upload_tensor_data/{project_id}/{embedding_id}"
+    url = f"{EMBEDDING_BASE_URI}/upload_tensor_data/{project_id}/{embedding_id}"
     service_requests.post_call_or_raise(url, {})
 
 
@@ -39,7 +39,7 @@ def request_re_embed_records(
     # example changes structure:
     # {"<embedding_id>":[{"record_id":"<record_id>","attribute_name":"<attribute_name>","sub_key":"<sub_key>"}]}
     # note that sub_key is optional and only for embedding lists relevant
-    url = f"{BASE_URI}/re_embed_records/{project_id}"
+    url = f"{EMBEDDING_BASE_URI}/re_embed_records/{project_id}"
     service_requests.post_call_or_raise(url, {"changes": changes})
 
 
@@ -96,3 +96,37 @@ def delete_embedding_from_neural_search(embedding_id: str) -> None:
     url = f"{NEURAL_SEARCH_BASE_URI}/delete_collection"
     params = {"embedding_id": embedding_id}
     requests.put(url, params=params)
+
+
+def request_tensor_for_text(
+    refinery_project_id: str, embedding_id: str, texts: List[str]
+) -> Any:
+    url = (
+        f"{EMBEDDING_BASE_URI}/calc-tensor-by-pkl/{refinery_project_id}/{embedding_id}"
+    )
+    data = {
+        "texts": texts,
+    }
+    return service_requests.post_call_or_raise(url, data)
+
+
+def request_most_similar_records(
+    project_id: str,
+    embedding_id: str,
+    embedding_tensor: List[float],
+    limit: int,
+    similarity_filter_option: Optional[List[Dict[str, Any]]] = None,
+    threshold: Optional[float] = None,
+    question: Optional[str] = None,
+) -> Any:
+    url = f"{NEURAL_SEARCH_BASE_URI}/most_similar_by_embedding?include_scores=true"
+    data = {
+        "project_id": project_id,
+        "embedding_id": embedding_id,
+        "embedding_tensor": embedding_tensor,
+        "limit": limit,
+        "att_filter": similarity_filter_option,
+        "threshold": threshold,
+        "question": question,
+    }
+    return service_requests.post_call_or_raise(url, data)
diff --git a/controller/playground/__init__.py b/controller/playground/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/controller/playground/manager.py b/controller/playground/manager.py
new file mode 100644
index 00000000..468ba327
--- /dev/null
+++ b/controller/playground/manager.py
@@ -0,0 +1,302 @@
+import traceback
+from typing import List, Any, Optional, Tuple, Dict
+import os
+from controller.embedding.connector import (
+    request_most_similar_records,
+    request_tensor_for_text,
+)
+from submodules.model.business_objects import (
+    evaluation_group as evaluation_group_db_bo,
+    evaluation_set as evaluation_set_db_bo,
+    evaluation_run as evaluation_run_db_bo,
+    playground_question as playground_question_db_bo,
+    attribute as attribute_db_bo,
+    record as record_db_bo,
+)
+from service.search.search import resolve_extended_search
+from submodules.model.enums import EvaluationRunState
+from submodules.model.util import sql_alchemy_to_dict, to_frontend_obj_raw
+from concurrent.futures import ThreadPoolExecutor
+from .reformulation import reformulate_question
+import json
+
+NEURAL_SEARCH = os.getenv("NEURAL_SEARCH")
+EMBEDDING_SERVICE = os.getenv("EMBEDDING_SERVICE")
+
+EVALUATION_RUN_LIMIT_DEFAULT = 100
+
+
+def get_search_result_for_text(
+    project_id: str,
+    embedding_id: str,
+    question: str,
+    limit: int,
+    filter=None,
+    threshold=None,
+):
+    question_tensor = __get_tensors_for_texts(project_id, embedding_id, [question])
+    if not question_tensor or not question_tensor[0]:
+        return []
+
+    records = __get_most_similar_records(
+        project_id,
+        embedding_id,
+        question_tensor[0],
+        limit,
+        filter,
+        threshold,
+        question,
+    )
+
+    record_ids = [record["id"] for record in records]
+    record_obj_map = record_db_bo.get_full_record_data_for_id_group(
+        project_id, record_ids
+    )
+
+    return [
+        {
+            "data": record_obj_map.get(record["id"]),
+            "id": record["id"],
+            "score": record["score"],
+        }
+        for record in records
+    ]
+
+
+def create_evaluation_set(
+    project_id: str, question: str, record_ids: List[str], created_by: str
+):
+    evaluation_set_db_bo.create(project_id, question, created_by, record_ids, True)
+
+
+def delete_evaluation_sets(project_id: str, set_ids: str):
+    evaluation_set_db_bo.delete_all(project_id, set_ids, True)
+
+
+def get_evaluation_set_by_id(project_id: str, set_id: str):
+    return evaluation_set_db_bo.get(project_id, set_id)
+
+
+def get_evaluation_sets(project_id: str):
+    return evaluation_set_db_bo.get_all(project_id)
+
+
+def get_evaluation_sets_by_group_id(project_id: str, evaluation_group_id: str):
+    return evaluation_set_db_bo.get_by_evaluation_group_id(
+        project_id, evaluation_group_id
+    )
+
+
+def create_evaluation_group(
+    project_id: str, name: str, evaluation_set_ids: List[str], created_by: str
+):
+    evaluation_group_db_bo.create(
+        project_id, name, created_by, evaluation_set_ids, True
+    )
+
+
+def delete_evaluation_groups(project_id: str, group_ids: str):
+    evaluation_group_db_bo.delete_all(project_id, group_ids, True)
+
+
+def get_evaluation_group_by_id(project_id: str, group_id: str):
+    return evaluation_group_db_bo.get(project_id, group_id)
+
+
+def get_evaluation_groups(project_id: str):
+    return evaluation_group_db_bo.get_all(project_id)
+
+
+def delete_evaluation_runs(project_id: str, run_ids: str):
+    return evaluation_run_db_bo.delete_all(project_id, run_ids, True)
+
+
+def get_evaluation_runs(project_id: str):
+    evaluation_runs_objects = evaluation_run_db_bo.get_all(project_id)
+    return [__pack_evaluation_run(run) for run in evaluation_runs_objects]
+
+
+def get_evaluation_run_by_id(project_id: str, run_id: str):
+    evaluation_run_object = evaluation_run_db_bo.get(project_id, run_id)
+    return __pack_evaluation_run(evaluation_run_object)
+
+
+def init_evaluation_run(
+    project_id: str,
+    embedding_id: str,
+    evaluation_group_id: str,
+    created_by: str,
+    threshold: float,
+):
+    evaluation_run = evaluation_run_db_bo.create(
+        project_id,
+        evaluation_group_id,
+        created_by,
+        embedding_id,
+        EvaluationRunState.RUNNING.value,
+    )
+    evaluation_results = []
+    try:
+        evaluation_sets = evaluation_set_db_bo.get_by_evaluation_group_id(
+            project_id, evaluation_group_id
+        )
+        questions_tensors = __get_tensors_for_texts(
+            project_id,
+            embedding_id,
+            [evaluation_set.question for evaluation_set in evaluation_sets],
+        )
+
+        with ThreadPoolExecutor(max_workers=min(10, len(evaluation_sets))) as executor:
+            futures = [
+                executor.submit(
+                    __get_most_similar_records,
+                    project_id,
+                    embedding_id,
+                    questions_tensors[index],
+                    EVALUATION_RUN_LIMIT_DEFAULT,
+                    None,
+                    threshold,
+                )
+                for index, evaluation_set in enumerate(evaluation_sets)
+            ]
+
+            search_results = [future.result() for future in futures]
+
+        all_record_ids = set()
+        for evaluation_set, search_result in zip(evaluation_sets, search_results):
+            expected_record_ids = {str(rid) for rid in evaluation_set.record_ids}
+            received_record_ids = {str(record["id"]) for record in search_result}
+            all_record_ids.update(expected_record_ids | received_record_ids)
+
+        all_records = record_db_bo.get_by_record_ids(project_id, list(all_record_ids))
+        record_map = {
+            str(record.id): sql_alchemy_to_dict(record) for record in all_records
+        }
+
+        for evaluation_set, search_result in zip(evaluation_sets, search_results):
+            expected_record_ids = {str(rid) for rid in evaluation_set.record_ids}
+            received_record_ids = {str(record["id"]) for record in search_result}
+
+            true_positives = expected_record_ids & received_record_ids
+            false_negatives = expected_record_ids - received_record_ids
+            false_positives = received_record_ids - expected_record_ids
+
+            result = {
+                "evaluation_set_id": str(evaluation_set.id),
+                "true_positives": to_frontend_obj_raw(
+                    [record_map[record_id] for record_id in true_positives]
+                ),
+                "false_positives": to_frontend_obj_raw(
+                    [record_map[record_id] for record_id in false_positives]
+                ),
+                "false_negatives": to_frontend_obj_raw(
+                    [record_map[record_id] for record_id in false_negatives]
+                ),
+            }
+            evaluation_results.append(result)
+        state = EvaluationRunState.SUCCESS.value
+    except Exception:
+        traceback.print_exc()
+        state = EvaluationRunState.FAILED.value
+    evaluation_run_db_bo.update(
+        project_id, evaluation_run.id, state, evaluation_results, None, True
+    )
+    return evaluation_run
+
+
+def __get_tensors_for_texts(
+    refinery_project_id: str, embedding_id: str, texts: List[str]
+) -> Tuple[bool, Optional[List[Any]]]:
+    obj = request_tensor_for_text(refinery_project_id, embedding_id, texts)
+    return obj.get("tensor", None)
+
+
+def __get_most_similar_records(
+    project_id: str,
+    embedding_id: str,
+    embedding_tensor: List[float],
+    limit: int,
+    similarity_filter_option: Optional[List[Dict[str, Any]]] = None,
+    threshold: Optional[float] = None,
+    question: Optional[str] = None,
+):
+    return request_most_similar_records(
+        project_id,
+        embedding_id,
+        embedding_tensor,
+        limit,
+        similarity_filter_option,
+        threshold,
+        question,
+    )
+
+
+def get_records_by_content(
+    project_id: str, user_id: str, content: str, limit: int, offset: int
+):
+
+    filter_data = __build_contains_filter(project_id, content)
+    record_list = resolve_extended_search(
+        project_id, user_id, filter_data, limit, offset
+    ).record_list
+    record_list = to_frontend_obj_raw(
+        sql_alchemy_to_dict(record_list, column_blacklist=["rla_data"])
+    )
+    return record_list
+
+
+def __build_contains_filter(project_id: str, content: str):
+    attributes = attribute_db_bo.get_all(project_id)
+
+    filter_each_attribute = [
+        {
+            "RELATION": "OR",
+            "NEGATION": False,
+            "TARGET_TABLE": "RECORD",
+            "TARGET_COLUMN": "DATA",
+            "OPERATOR": "CONTAINS",
+            "VALUES": [f"{attribute.name}", f"{content}"],
+        }
+        for attribute in attributes
+    ]
+
+    if len(filter_each_attribute) > 0:
+        filter_each_attribute[0]["RELATION"] = "NONE"
+
+    final_filter = [
+        {
+            "RELATION": "NONE",
+            "NEGATION": False,
+            "TARGET_TABLE": "RECORD",
+            "TARGET_COLUMN": "CATEGORY",
+            "OPERATOR": "EQUAL",
+            "VALUES": ["SCALE"],
+        },
+        {"RELATION": "AND", "NEGATION": False, "FILTER": filter_each_attribute},
+    ]
+
+    return final_filter
+
+
+def __pack_evaluation_run(evaluation_run):
+    return {
+        **sql_alchemy_to_dict(evaluation_run, True),
+        "results": to_frontend_obj_raw(evaluation_run.results),
+    }
+
+
+def get_question_reformulation(question: str, api_key: str) -> Optional[Dict]:
+    q_reformulated = reformulate_question(question, api_key)
+    try:
+        reformulation_dict = json.loads(q_reformulated)
+        return reformulation_dict
+    except Exception:
+        return None
+
+
+def get_playground_questions(project_id: str):
+    return playground_question_db_bo.get_all(project_id)
+
+
+def delete_playground_question(project_id: str, question_id: str):
+    playground_question_db_bo.delete(project_id, question_id)
diff --git a/controller/playground/reformulation.py b/controller/playground/reformulation.py
new file mode 100644
index 00000000..0c124d90
--- /dev/null
+++ b/controller/playground/reformulation.py
@@ -0,0 +1,78 @@
+import traceback
+from openai import OpenAI
+from openai.types.chat import ChatCompletion, ChatCompletionChunk
+from typing import Union, Optional
+
+REFORMULATION_PROMPT = """Generate a refined and optimized reformulation of a given question, ensuring it aligns better with the user's search intent and maximizes result relevance for RAG. The reformulated question should maintain the original meaning while improving clarity, specificity, and contextual precision.
+
+# Important Guidelines
+
+- **Enhanced Clarity**: Improve sentence structure and wording for better readability.
+- **Intent Alignment**: Ensure the reformulated question accurately reflects the user's underlying intent.
+- **Keyword Optimization**: Retain and enhance relevant keywords for improved search efficiency.
+- **Context Preservation**: Maintain all necessary contextual details while removing ambiguity.
+- **Concise & Precise**: Avoid unnecessary words while keeping the question well-structured.
+- **Neutral Tone**: Ensure the tone remains neutral and professional.
+- **Variant Suggestions**: If applicable, suggest alternative phrasings for different perspectives.
+- **Avoid Unnecessary Expansions**: Do not introduce irrelevant details that alter the original intent.
+- **Question Type Awareness**: Recognize if the question seeks factual, comparative, or exploratory information and optimize accordingly.
+- **No Change in Core Meaning**: Ensure the fundamental intent and topic of the question remain intact.
+
+# Steps
+
+1. Analyze the original question to identify key intent and meaning.
+2. Reformulate it with improved clarity, precision, and structure.
+3. Optimize for search relevance while preserving the original inquiry’s core message.
+4. Provide alternative phrasings if they offer significant improvement.
+5. Ensure the final output is concise, neutral, and intent-aligned.
+
+# Output Format
+Only the pure valid json with key "reformulation" and value as the improved version of the original question.
+
+{
+  "reformulation": "<The improved version of the original question>",
+}
+
+# Notes
+
+A poorly reformulated question may lead to irrelevant or misleading results. It is essential to preserve intent while enhancing clarity and relevance."""
+
+
+def reformulate_question(question: str, api_key: str) -> Optional[str]:
+    try:
+        openai_client = OpenAI(api_key=api_key)
+        messages = [
+            {"role": "system", "content": REFORMULATION_PROMPT},
+            {"role": "user", "content": f"Question: {question}"},
+        ]
+        completion = openai_client.chat.completions.create(
+            model="gpt-4o-mini", messages=messages, stream=False
+        )
+        return __get_openai_value_from(completion)
+    except Exception:
+        traceback.print_exc()
+        return None
+
+
+def __get_openai_value_from(
+    open_ai_obj: Union[ChatCompletion, ChatCompletionChunk], raise_me: bool = True
+) -> str:
+    if not open_ai_obj:
+        return ""
+    if isinstance(open_ai_obj, ChatCompletion) or isinstance(
+        open_ai_obj, ChatCompletionChunk
+    ):
+        if hasattr(open_ai_obj, "choices") and len(open_ai_obj.choices) > 0:
+            t = open_ai_obj.choices[0]
+            if isinstance(open_ai_obj, ChatCompletion):
+                if hasattr(t, "message") and hasattr(t.message, "content"):
+                    return t.message.content or ""
+            elif isinstance(open_ai_obj, ChatCompletionChunk):
+                if hasattr(t, "delta") and hasattr(t.delta, "content"):
+                    return t.delta.content or ""
+    else:
+        raise ValueError("Unknown open_ai_obj:" + type(open_ai_obj))
+    ## if we reach this point, we couldn't access the value
+    if raise_me:
+        raise ValueError("Couldn't access value from", open_ai_obj)
+    return ""
diff --git a/controller/record/manager.py b/controller/record/manager.py
index a02a7704..ad15a0e6 100644
--- a/controller/record/manager.py
+++ b/controller/record/manager.py
@@ -30,6 +30,10 @@ def get_record(project_id: str, record_id: str) -> Record:
     return record.get(project_id, record_id)
 
 
+def get_record_by_ids(project_id: str, record_ids: List[str]) -> List[Record]:
+    return record.get_by_record_ids(project_id, record_ids)
+
+
 def get_records_by_similarity_search(
     project_id: str,
     user_id: str,
diff --git a/fast_api/models.py b/fast_api/models.py
index ac790911..51fd812c 100644
--- a/fast_api/models.py
+++ b/fast_api/models.py
@@ -444,3 +444,54 @@ class UpdateCustomerButton(BaseModel):
 
 class MissingUsersBody(BaseModel):
     user_ids: List[StrictStr]
+
+
+class SearchQuestionBody(BaseModel):
+    question: StrictStr
+    embeddingId: StrictStr
+    limit: Optional[StrictInt] = None
+    filter: Optional[List[Dict]] = None
+    threshold: Optional[StrictFloat] = None
+
+
+class EvaluationSetCreationBody(BaseModel):
+    question: StrictStr
+    recordIds: List[StrictStr]
+
+
+class EvaluationSetDeletionBody(BaseModel):
+    evaluationSetIds: List[StrictStr]
+
+
+class EvaluationGroupCreationBody(BaseModel):
+    evaluationSetIds: List[StrictStr]
+    name: StrictStr
+
+
+class EvaluationGroupDeletionBody(BaseModel):
+    evaluationGroupIds: List[StrictStr]
+
+
+class EvaluationRunCreationBody(BaseModel):
+    embeddingId: StrictStr
+    evaluationGroupId: StrictStr
+    threshold: StrictFloat
+
+
+class RecordSearchContains(BaseModel):
+    query: StrictStr
+    offset: StrictInt
+    limit: StrictInt
+
+
+class RecordsBatchBody(BaseModel):
+    record_ids: List[StrictStr]
+
+
+class EvaluationRunDeletionBody(BaseModel):
+    evaluationRunIds: List[StrictStr]
+
+
+class SearchQuestionReformulationBody(BaseModel):
+    question: StrictStr
+    apiKey: StrictStr
diff --git a/fast_api/routes/playground.py b/fast_api/routes/playground.py
new file mode 100644
index 00000000..7bb16d5e
--- /dev/null
+++ b/fast_api/routes/playground.py
@@ -0,0 +1,285 @@
+from fastapi import APIRouter, Depends, Request, Body
+from controller.auth import manager as auth_manager
+from fast_api.routes.client_response import (
+    pack_json_result,
+    get_silent_success,
+)
+from fast_api.models import (
+    EvaluationRunDeletionBody,
+    SearchQuestionBody,
+    SearchQuestionReformulationBody,
+    EvaluationSetCreationBody,
+    EvaluationGroupCreationBody,
+    EvaluationRunCreationBody,
+    RecordSearchContains,
+    EvaluationSetDeletionBody,
+    EvaluationGroupDeletionBody,
+)
+from controller.playground import manager as playground_manager
+
+router = APIRouter()
+
+
+@router.post(
+    "/{project_id}/search",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_search_results_question(
+    request: Request,
+    project_id: str,
+    search_question: SearchQuestionBody = Body(...),
+):
+    search_results = playground_manager.get_search_result_for_text(
+        project_id,
+        search_question.embeddingId,
+        search_question.question,
+        search_question.limit,
+        search_question.filter,
+        search_question.threshold,
+    )
+
+    return pack_json_result(search_results, wrap_for_frontend=False)
+
+
+@router.get(
+    "/{project_id}/playground-questions",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_playground_questions(
+    request: Request,
+    project_id: str,
+):
+    questions = playground_manager.get_playground_questions(project_id)
+    return pack_json_result(questions)
+
+
+@router.post(
+    "/{project_id}/evaluation-sets",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def create_evaluation_set(
+    request: Request,
+    project_id: str,
+    evaluation_set: EvaluationSetCreationBody = Body(...),
+):
+    user_id = auth_manager.get_user_id_by_info(request.state.info)
+    playground_manager.create_evaluation_set(
+        project_id,
+        evaluation_set.question,
+        evaluation_set.recordIds,
+        user_id,
+    )
+    return get_silent_success()
+
+
+@router.delete(
+    "/{project_id}/evaluation-sets",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def delete_evaluation_set(
+    request: Request,
+    project_id: str,
+    evaluation_set: EvaluationSetDeletionBody = Body(...),
+):
+    playground_manager.delete_evaluation_sets(
+        project_id, evaluation_set.evaluationSetIds
+    )
+    return get_silent_success()
+
+
+@router.get(
+    "/{project_id}/evaluation-sets",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_evaluation_sets(
+    request: Request,
+    project_id: str,
+):
+    matching_sets = playground_manager.get_evaluation_sets(project_id)
+    return pack_json_result(matching_sets)
+
+
+@router.get(
+    "/{project_id}/evaluation-sets/{set_id}",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_single_evaluation_set(
+    request: Request,
+    project_id: str,
+    set_id: str,
+):
+    matching_set = playground_manager.get_evaluation_set_by_id(project_id, set_id)
+    return pack_json_result(matching_set)
+
+
+@router.get(
+    "/{project_id}/evaluation-sets-by-group/{evaluation_group_id}",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_evaluation_sets_batch(
+    request: Request,
+    project_id: str,
+    evaluation_group_id: str,
+):
+    evaluation_sets = playground_manager.get_evaluation_sets_by_group_id(
+        project_id, evaluation_group_id
+    )
+
+    return pack_json_result(evaluation_sets)
+
+
+@router.post(
+    "/{project_id}/evaluation-groups",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def create_evaluation_group(
+    request: Request,
+    project_id: str,
+    evaluation_group: EvaluationGroupCreationBody = Body(...),
+):
+    user_id = auth_manager.get_user_id_by_info(request.state.info)
+    playground_manager.create_evaluation_group(
+        project_id,
+        evaluation_group.name,
+        evaluation_group.evaluationSetIds,
+        user_id,
+    )
+    return get_silent_success()
+
+
+@router.delete(
+    "/{project_id}/evaluation-groups",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def delete_evaluation_group(
+    request: Request,
+    project_id: str,
+    evaluation_group: EvaluationGroupDeletionBody = Body(...),
+):
+    playground_manager.delete_evaluation_groups(
+        project_id, evaluation_group.evaluationGroupIds
+    )
+    return get_silent_success()
+
+
+@router.get(
+    "/{project_id}/evaluation-groups",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_evaluation_groups(request: Request, project_id: str):
+    evaluation_groups = playground_manager.get_evaluation_groups(project_id)
+    return pack_json_result(evaluation_groups)
+
+
+@router.get("/{project_id}/evaluation-groups/{group_id}")
+def get_single_evaluation_group(
+    request: Request,
+    project_id: str,
+    group_id: str,
+):
+    evaluation_group = playground_manager.get_evaluation_group_by_id(
+        project_id, group_id
+    )
+    return pack_json_result(evaluation_group)
+
+
+@router.get(
+    "/{project_id}/evaluation-runs",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_evaluation_runs(request: Request, project_id: str):
+    evaluation_runs = playground_manager.get_evaluation_runs(project_id)
+    return pack_json_result(evaluation_runs, wrap_for_frontend=False)
+
+
+@router.get("/{project_id}/evaluation-runs/{run_id}")
+def get_single_evaluation_run(
+    request: Request,
+    project_id: str,
+    run_id: str,
+):
+    evaluation_run = playground_manager.get_evaluation_run_by_id(project_id, run_id)
+    return pack_json_result(evaluation_run, wrap_for_frontend=False)
+
+
+@router.post(
+    "/{project_id}/evaluation-runs",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def create_evaluation_run(
+    request: Request,
+    project_id: str,
+    evaluation_run: EvaluationRunCreationBody = Body(...),
+):
+    user_id = auth_manager.get_user_id_by_info(request.state.info)
+    playground_manager.init_evaluation_run(
+        project_id,
+        evaluation_run.embeddingId,
+        evaluation_run.evaluationGroupId,
+        user_id,
+        evaluation_run.threshold,
+    )
+    return get_silent_success()
+
+
+@router.post(
+    "/{project_id}/record-search-contains",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_record_by_content(
+    request: Request,
+    project_id: str,
+    record_search: RecordSearchContains = Body(...),
+):
+    user_id = auth_manager.get_user_id_by_info(request.state.info)
+    records = playground_manager.get_records_by_content(
+        project_id,
+        user_id,
+        record_search.query,
+        record_search.limit,
+        record_search.offset,
+    )
+    return pack_json_result(records, wrap_for_frontend=False)
+
+
+@router.delete(
+    "/{project_id}/evaluation-runs",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def delete_evaluation_run(
+    request: Request,
+    project_id: str,
+    evaluation_set: EvaluationRunDeletionBody = Body(...),
+):
+    playground_manager.delete_evaluation_runs(
+        project_id, evaluation_set.evaluationRunIds
+    )
+    return get_silent_success()
+
+
+@router.post(
+    "/{project_id}/reformulation",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_question_reformulation(
+    request: Request,
+    project_id: str,
+    question_reformulation: SearchQuestionReformulationBody = Body(...),
+):
+    reformulation = playground_manager.get_question_reformulation(
+        question_reformulation.question, question_reformulation.apiKey
+    )
+    return pack_json_result(reformulation)
+
+
+@router.delete(
+    "/{project_id}/playground-questions/{question_id}",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def delete_playground_question(
+    request: Request,
+    project_id: str,
+    question_id: str,
+):
+    playground_manager.delete_playground_question(project_id, question_id)
+    return get_silent_success()
diff --git a/fast_api/routes/project_setting.py b/fast_api/routes/project_setting.py
index 1d7b5144..fd8774de 100644
--- a/fast_api/routes/project_setting.py
+++ b/fast_api/routes/project_setting.py
@@ -3,6 +3,7 @@
     CreateNewAttributeBody,
     PrepareProjectExportBody,
     PrepareRecordExportBody,
+    RecordsBatchBody,
     UpdateAttributeBody,
 )
 from fastapi import APIRouter, Body, Depends, Request
@@ -148,6 +149,26 @@ def get_record_by_record_id(
     return pack_json_result(data)
 
 
+@router.post(
+    "/{project_id}/records-batch",
+    dependencies=[Depends(auth_manager.check_project_access_dep)],
+)
+def get_records_batch(
+    project_id: str,
+    recordsBatchBody: RecordsBatchBody = Body(...),
+):
+    records = record_manager.get_record_by_ids(project_id, recordsBatchBody.record_ids)
+    results = [
+        {
+            "id": str(record.id),
+            "data": json.dumps(record.data),
+        }
+        for record in records
+    ]
+
+    return pack_json_result(results)
+
+
 @router.get(
     "/{project_id}/project-size",
     dependencies=[Depends(auth_manager.check_project_access_dep)],
diff --git a/requirements.txt b/requirements.txt
index df8f6eb6..06d107b9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,8 @@ annotated-types==0.7.0
 anyio==4.6.2.post1
     # via
     #   -r requirements/common-requirements.txt
+    #   httpx
+    #   openai
     #   starlette
 blis==0.7.11
     # via thinc
@@ -31,6 +33,8 @@ catalogue==2.0.10
 certifi==2024.8.30
     # via
     #   -r requirements/common-requirements.txt
+    #   httpcore
+    #   httpx
     #   minio
     #   requests
 charset-normalizer==3.4.0
@@ -53,6 +57,8 @@ cymem==2.0.8
     #   preshed
     #   spacy
     #   thinc
+distro==1.9.0
+    # via openai
 docker==5.0.0
     # via -r requirements/requirements.in
 et-xmlfile==1.1.0
@@ -63,14 +69,24 @@ exceptiongroup==1.2.2
     #   anyio
 fastapi==0.115.2
     # via -r requirements/common-requirements.txt
+greenlet==3.1.1
+    # via sqlalchemy
 h11==0.14.0
     # via
     #   -r requirements/common-requirements.txt
+    #   httpcore
     #   uvicorn
+httpcore==1.0.7
+    # via httpx
+httpx==0.27.2
+    # via
+    #   -r requirements/requirements.in
+    #   openai
 idna==3.10
     # via
     #   -r requirements/common-requirements.txt
     #   anyio
+    #   httpx
     #   requests
 jinja2==3.1.4
     # via spacy
@@ -92,7 +108,7 @@ markupsafe==2.1.5
     #   jinja2
     #   mako
 minio==7.1.12
-    # via -r requirements/common-requirements.txt
+    # via -r DRF
 murmurhash==1.0.10
     # via
     #   preshed
@@ -105,6 +121,8 @@ numpy==1.23.4
     #   pandas
     #   spacy
     #   thinc
+openai==1.31.0
+    # via -r requirements/requirements.in
 openpyxl==3.0.10
     # via -r requirements/requirements.in
 packaging==24.0
@@ -128,6 +146,7 @@ pydantic==2.7.4
     #   -r requirements/requirements.in
     #   confection
     #   fastapi
+    #   openai
     #   spacy
     #   thinc
     #   weasel
@@ -170,6 +189,8 @@ sniffio==1.3.1
     # via
     #   -r requirements/common-requirements.txt
     #   anyio
+    #   httpx
+    #   openai
 spacy[ja]==3.7.5
     # via -r requirements/requirements.in
 spacy-legacy==3.0.12
@@ -199,7 +220,9 @@ sudachipy==0.6.8
 thinc==8.2.5
     # via spacy
 tqdm==4.66.4
-    # via spacy
+    # via
+    #   openai
+    #   spacy
 typer==0.4.2
     # via
     #   spacy
@@ -210,6 +233,7 @@ typing-extensions==4.12.2
     #   anyio
     #   cloudpathlib
     #   fastapi
+    #   openai
     #   pydantic
     #   pydantic-core
     #   starlette
diff --git a/requirements/requirements.in b/requirements/requirements.in
index efc31e82..e98c34e4 100644
--- a/requirements/requirements.in
+++ b/requirements/requirements.in
@@ -9,4 +9,6 @@ pyjwt==2.4.0
 spacy[ja]==3.7.5
 pyminizip==0.2.6
 rncryptor==3.3.0
-pydantic==2.7.4
\ No newline at end of file
+pydantic==2.7.4
+httpx==0.27.2
+openai==1.31.0
\ No newline at end of file
diff --git a/route_prefix.py b/route_prefix.py
index b519c320..5c511fbe 100644
--- a/route_prefix.py
+++ b/route_prefix.py
@@ -16,3 +16,4 @@
 PREFIX_WEAK_SUPERVISION = PREFIX + "/weak-supervision"
 PREFIX_LABELING_TASKS = PREFIX + "/labeling-tasks"
 PREFIX_TASK_EXECUTION = PREFIX + "/task-execution"
+PREFIX_PLAYGROUND = PREFIX + "/playground"
diff --git a/submodules/model b/submodules/model
index cf337c2d..8e59d8ef 160000
--- a/submodules/model
+++ b/submodules/model
@@ -1 +1 @@
-Subproject commit cf337c2d46a302f33ed35419c1d95a60871bbe61
+Subproject commit 8e59d8ef75b0ade84d1ef4d0b7e957eb324c2bc8