Skip to content

Commit 71c6918

Browse files
docs: hello world for ragas experimental (#2100)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
1 parent 356d6bf commit 71c6918

File tree

3 files changed

+261
-179
lines changed

3 files changed

+261
-179
lines changed

docs/experimental/index.md

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,85 @@
11
# Ragas Experimental
22

3-
Under the works but stay tuned :)
3+
## Hello World 👋
4+
5+
1. Setup a sample experiment.
6+
7+
```
8+
ragas hello-world
9+
```
10+
11+
2. Run your first experiment with Ragas CLI.
12+
13+
```
14+
ragas evals hello_world/evals.py --dataset test_data --metrics accuracy --name first_experiment
15+
```
16+
17+
```
18+
Running evaluation: hello_world/evals.py
19+
Dataset: test_data
20+
Getting dataset: test_data
21+
✓ Loaded dataset with 10 rows
22+
Running experiment: 100%|████████████████████████████████████████████████| 20/20 [00:00<00:00, 4872.00it/s]
23+
✓ Completed experiments successfully
24+
╭────────────────────────── Ragas Evaluation Results ──────────────────────────╮
25+
│ Experiment: lucid_codd │
26+
│ Dataset: test_data (10 rows) │
27+
╰──────────────────────────────────────────────────────────────────────────────╯
28+
Numerical Metrics
29+
┏━━━━━━━━━━┳━━━━━━━━━┓
30+
┃ Metric ┃ Current ┃
31+
┡━━━━━━━━━━╇━━━━━━━━━┩
32+
│ accuracy │ 0.100 │
33+
└──────────┴─────────┘
34+
✓ Experiment results displayed
35+
✓ Evaluation completed successfully
36+
```
37+
38+
3. Inspect the results
39+
40+
```
41+
tree hello_world/experiments
42+
```
43+
44+
```
45+
hello_world/experiments
46+
└── first_experiment.csv
47+
48+
0 directories, 1 files
49+
```
50+
51+
4. View the results in a spreadsheet application.
52+
53+
```
54+
open hello_world/experiments/first_experiment.csv
55+
```
56+
57+
5. Run your second experiment and compare with the first one.
58+
59+
```
60+
ragas evals hello_world/evals.py --dataset test_data --metrics accuracy --baseline first_experiment
61+
```
62+
63+
```
64+
Running evaluation: hello_world/evals.py
65+
Dataset: test_data
66+
Baseline: first_experiment
67+
Getting dataset: test_data
68+
✓ Loaded dataset with 10 rows
69+
Running experiment: 100%|█████████████████████████████| 20/20 [00:00<00:00, 4900.46it/s]
70+
✓ Completed experiments successfully
71+
Comparing against baseline: first_experiment
72+
╭────────────────────────── Ragas Evaluation Results ──────────────────────────╮
73+
│ Experiment: vigilant_brin │
74+
│ Dataset: test_data (10 rows) │
75+
│ Baseline: first_experiment │
76+
╰──────────────────────────────────────────────────────────────────────────────╯
77+
Numerical Metrics
78+
┏━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━┓
79+
┃ Metric ┃ Current ┃ Baseline ┃ Delta ┃ Gate ┃
80+
┡━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━┩
81+
│ accuracy │ 0.000 │ 0.000 │ ▼0.000 │ pass │
82+
└──────────┴─────────┴──────────┴────────┴──────┘
83+
✓ Comparison completed
84+
✓ Evaluation completed successfully
85+
```

experimental/ragas_experimental/cli.py

Lines changed: 178 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from rich.table import Table
1414
from rich.text import Text
1515
from rich.panel import Panel
16+
from rich.spinner import Spinner
17+
from rich.live import Live
1618
from .project.core import Project
1719
from .utils import console
1820

@@ -265,6 +267,7 @@ async def run_experiments(
265267
input_data_class: type,
266268
baseline_name: Optional[str] = None,
267269
metrics: str = None,
270+
name: Optional[str] = None,
268271
):
269272
"""Run experiments using ragas dataset system."""
270273
console.print(f"Getting dataset: {dataset_name}")
@@ -280,7 +283,7 @@ async def run_experiments(
280283

281284
# Run the experiment using the run_async method
282285
try:
283-
experiment_result = await experiment_func.run_async(dataset)
286+
experiment_result = await experiment_func.run_async(dataset, name=name)
284287
success("✓ Completed experiments successfully")
285288
except Exception as e:
286289
error(f"Error running experiments: {e}")
@@ -373,6 +376,9 @@ def evals(
373376
baseline: Optional[str] = typer.Option(
374377
None, "--baseline", help="Baseline experiment name to compare against"
375378
),
379+
name: Optional[str] = typer.Option(
380+
None, "--name", help="Name of the experiment run"
381+
),
376382
):
377383
"""Run evaluations on a dataset."""
378384
console.print(f"Running evaluation: {eval_file}")
@@ -428,7 +434,13 @@ def evals(
428434
# Run the experiments
429435
asyncio.run(
430436
run_experiments(
431-
project, experiment_func, dataset, input_data_class, baseline, metrics
437+
project,
438+
experiment_func,
439+
dataset,
440+
input_data_class,
441+
baseline,
442+
metrics,
443+
name,
432444
)
433445
)
434446
success("✓ Evaluation completed successfully")
@@ -439,5 +451,169 @@ def evals(
439451
raise typer.Exit(1)
440452

441453

454+
@app.command()
455+
def hello_world(
456+
directory: Optional[str] = typer.Argument(
457+
".", help="Directory to run the hello world example in"
458+
),
459+
):
460+
import pandas as pd
461+
import os
462+
import time
463+
464+
if not os.path.exists(directory):
465+
raise typer.Exit(f"Directory {directory} does not exist.")
466+
467+
with Live(
468+
Spinner("dots", text="Creating hello world example...", style="green"),
469+
console=console,
470+
) as live:
471+
live.update(Spinner("dots", text="Creating directories...", style="green"))
472+
Path(directory).joinpath("hello_world").mkdir(parents=True, exist_ok=True)
473+
os.makedirs(os.path.join(directory, "hello_world", "datasets"), exist_ok=True)
474+
os.makedirs(
475+
os.path.join(directory, "hello_world", "experiments"), exist_ok=True
476+
)
477+
time.sleep(0.5) # Brief pause to show spinner
478+
479+
live.update(Spinner("dots", text="Creating test dataset...", style="green"))
480+
hello_world_data = [
481+
{
482+
"id": 1,
483+
"query": "What is the capital of France?",
484+
"expected_output": "Paris",
485+
},
486+
{"id": 2, "query": "What is 2 + 2?", "expected_output": "4"},
487+
{
488+
"id": 3,
489+
"query": "What is the largest mammal?",
490+
"expected_output": "Blue Whale",
491+
},
492+
{
493+
"id": 4,
494+
"query": "Who developed the theory of relativity?",
495+
"expected_output": "Einstein",
496+
},
497+
{
498+
"id": 5,
499+
"query": "What is the programming language used for data science?",
500+
"expected_output": "Python",
501+
},
502+
{
503+
"id": 6,
504+
"query": "What is the highest mountain in the world?",
505+
"expected_output": "Mount Everest",
506+
},
507+
{
508+
"id": 7,
509+
"query": "Who wrote 'Romeo and Juliet'?",
510+
"expected_output": "Shakespeare",
511+
},
512+
{
513+
"id": 8,
514+
"query": "What is the fourth planet from the Sun?",
515+
"expected_output": "Mars",
516+
},
517+
{
518+
"id": 9,
519+
"query": "What is the name of the fruit that keeps the doctor away?",
520+
"expected_output": "Apple",
521+
},
522+
{
523+
"id": 10,
524+
"query": "Who painted the Mona Lisa?",
525+
"expected_output": "Leonardo da Vinci",
526+
},
527+
]
528+
df = pd.DataFrame(hello_world_data)
529+
df.to_csv(
530+
os.path.join(directory, "hello_world", "datasets", "test_data.csv"),
531+
index=False,
532+
)
533+
time.sleep(0.5) # Brief pause to show spinner
534+
535+
live.update(
536+
Spinner("dots", text="Creating evaluation script...", style="green")
537+
)
538+
# Create evals.py file
539+
evals_content = '''import typing as t
540+
541+
import numpy as np
542+
from ragas_experimental import BaseModel, Project
543+
from ragas_experimental.project.backends import LocalCSVProjectBackend
544+
from ragas_experimental.metric.result import MetricResult
545+
from ragas_experimental.metric.numeric import numeric_metric
546+
547+
p = Project(
548+
project_id="hello_world",
549+
project_backend=LocalCSVProjectBackend("."),
550+
)
551+
552+
553+
@numeric_metric(name="accuracy_score", range=(0, 1))
554+
def accuracy_score(response: str, expected: str):
555+
"""
556+
Is the response a good response to the query?
557+
"""
558+
result = 1 if expected.lower().strip() == response.lower().strip() else 0
559+
return MetricResult(
560+
result=result,
561+
reason=(
562+
f"Response contains {expected}"
563+
if result
564+
else f"Response does not contain {expected}"
565+
),
566+
)
567+
568+
569+
def mock_app_endpoint(**kwargs) -> str:
570+
"""Mock AI endpoint for testing purposes."""
571+
mock_responses = [
572+
"Paris","4","Blue Whale","Einstein","Python","Mount Everest","Shakespeare",
573+
"Mars","Apple","Leonardo da Vinci",]
574+
return np.random.choice(mock_responses)
575+
576+
577+
class TestDataRow(BaseModel):
578+
id: t.Optional[int]
579+
query: str
580+
expected_output: str
581+
582+
583+
class ExperimentDataRow(TestDataRow):
584+
response: str
585+
accuracy: int
586+
accuracy_reason: t.Optional[str] = None
587+
588+
589+
@p.experiment(ExperimentDataRow)
590+
async def run_experiment(row: TestDataRow):
591+
response = mock_app_endpoint(query=row.query)
592+
accuracy = accuracy_score.score(response=response, expected=row.expected_output)
593+
594+
experiment_view = ExperimentDataRow(
595+
**row.model_dump(),
596+
response=response,
597+
accuracy=accuracy.result,
598+
accuracy_reason=accuracy.reason,
599+
)
600+
return experiment_view
601+
'''
602+
603+
evals_path = os.path.join(directory, "hello_world", "evals.py")
604+
with open(evals_path, "w") as f:
605+
f.write(evals_content)
606+
time.sleep(0.5) # Brief pause to show spinner
607+
608+
live.update(Spinner("dots", text="Finalizing hello world example..."))
609+
time.sleep(0.5) # Brief pause to show spinner
610+
611+
hello_world_path = os.path.join(directory, "hello_world")
612+
success(f"✓ Created hello world example in {hello_world_path}")
613+
success(
614+
"✓ You can now run: ragas evals hello_world/evals.py --dataset test_data --metrics accuracy"
615+
)
616+
617+
442618
if __name__ == "__main__":
443619
app()

0 commit comments

Comments
 (0)