13
13
from rich .table import Table
14
14
from rich .text import Text
15
15
from rich .panel import Panel
16
+ from rich .spinner import Spinner
17
+ from rich .live import Live
16
18
from .project .core import Project
17
19
from .utils import console
18
20
@@ -265,6 +267,7 @@ async def run_experiments(
265
267
input_data_class : type ,
266
268
baseline_name : Optional [str ] = None ,
267
269
metrics : str = None ,
270
+ name : Optional [str ] = None ,
268
271
):
269
272
"""Run experiments using ragas dataset system."""
270
273
console .print (f"Getting dataset: { dataset_name } " )
@@ -280,7 +283,7 @@ async def run_experiments(
280
283
281
284
# Run the experiment using the run_async method
282
285
try :
283
- experiment_result = await experiment_func .run_async (dataset )
286
+ experiment_result = await experiment_func .run_async (dataset , name = name )
284
287
success ("✓ Completed experiments successfully" )
285
288
except Exception as e :
286
289
error (f"Error running experiments: { e } " )
@@ -373,6 +376,9 @@ def evals(
373
376
baseline : Optional [str ] = typer .Option (
374
377
None , "--baseline" , help = "Baseline experiment name to compare against"
375
378
),
379
+ name : Optional [str ] = typer .Option (
380
+ None , "--name" , help = "Name of the experiment run"
381
+ ),
376
382
):
377
383
"""Run evaluations on a dataset."""
378
384
console .print (f"Running evaluation: { eval_file } " )
@@ -428,7 +434,13 @@ def evals(
428
434
# Run the experiments
429
435
asyncio .run (
430
436
run_experiments (
431
- project , experiment_func , dataset , input_data_class , baseline , metrics
437
+ project ,
438
+ experiment_func ,
439
+ dataset ,
440
+ input_data_class ,
441
+ baseline ,
442
+ metrics ,
443
+ name ,
432
444
)
433
445
)
434
446
success ("✓ Evaluation completed successfully" )
@@ -439,5 +451,169 @@ def evals(
439
451
raise typer .Exit (1 )
440
452
441
453
454
+ @app .command ()
455
+ def hello_world (
456
+ directory : Optional [str ] = typer .Argument (
457
+ "." , help = "Directory to run the hello world example in"
458
+ ),
459
+ ):
460
+ import pandas as pd
461
+ import os
462
+ import time
463
+
464
+ if not os .path .exists (directory ):
465
+ raise typer .Exit (f"Directory { directory } does not exist." )
466
+
467
+ with Live (
468
+ Spinner ("dots" , text = "Creating hello world example..." , style = "green" ),
469
+ console = console ,
470
+ ) as live :
471
+ live .update (Spinner ("dots" , text = "Creating directories..." , style = "green" ))
472
+ Path (directory ).joinpath ("hello_world" ).mkdir (parents = True , exist_ok = True )
473
+ os .makedirs (os .path .join (directory , "hello_world" , "datasets" ), exist_ok = True )
474
+ os .makedirs (
475
+ os .path .join (directory , "hello_world" , "experiments" ), exist_ok = True
476
+ )
477
+ time .sleep (0.5 ) # Brief pause to show spinner
478
+
479
+ live .update (Spinner ("dots" , text = "Creating test dataset..." , style = "green" ))
480
+ hello_world_data = [
481
+ {
482
+ "id" : 1 ,
483
+ "query" : "What is the capital of France?" ,
484
+ "expected_output" : "Paris" ,
485
+ },
486
+ {"id" : 2 , "query" : "What is 2 + 2?" , "expected_output" : "4" },
487
+ {
488
+ "id" : 3 ,
489
+ "query" : "What is the largest mammal?" ,
490
+ "expected_output" : "Blue Whale" ,
491
+ },
492
+ {
493
+ "id" : 4 ,
494
+ "query" : "Who developed the theory of relativity?" ,
495
+ "expected_output" : "Einstein" ,
496
+ },
497
+ {
498
+ "id" : 5 ,
499
+ "query" : "What is the programming language used for data science?" ,
500
+ "expected_output" : "Python" ,
501
+ },
502
+ {
503
+ "id" : 6 ,
504
+ "query" : "What is the highest mountain in the world?" ,
505
+ "expected_output" : "Mount Everest" ,
506
+ },
507
+ {
508
+ "id" : 7 ,
509
+ "query" : "Who wrote 'Romeo and Juliet'?" ,
510
+ "expected_output" : "Shakespeare" ,
511
+ },
512
+ {
513
+ "id" : 8 ,
514
+ "query" : "What is the fourth planet from the Sun?" ,
515
+ "expected_output" : "Mars" ,
516
+ },
517
+ {
518
+ "id" : 9 ,
519
+ "query" : "What is the name of the fruit that keeps the doctor away?" ,
520
+ "expected_output" : "Apple" ,
521
+ },
522
+ {
523
+ "id" : 10 ,
524
+ "query" : "Who painted the Mona Lisa?" ,
525
+ "expected_output" : "Leonardo da Vinci" ,
526
+ },
527
+ ]
528
+ df = pd .DataFrame (hello_world_data )
529
+ df .to_csv (
530
+ os .path .join (directory , "hello_world" , "datasets" , "test_data.csv" ),
531
+ index = False ,
532
+ )
533
+ time .sleep (0.5 ) # Brief pause to show spinner
534
+
535
+ live .update (
536
+ Spinner ("dots" , text = "Creating evaluation script..." , style = "green" )
537
+ )
538
+ # Create evals.py file
539
+ evals_content = '''import typing as t
540
+
541
+ import numpy as np
542
+ from ragas_experimental import BaseModel, Project
543
+ from ragas_experimental.project.backends import LocalCSVProjectBackend
544
+ from ragas_experimental.metric.result import MetricResult
545
+ from ragas_experimental.metric.numeric import numeric_metric
546
+
547
+ p = Project(
548
+ project_id="hello_world",
549
+ project_backend=LocalCSVProjectBackend("."),
550
+ )
551
+
552
+
553
+ @numeric_metric(name="accuracy_score", range=(0, 1))
554
+ def accuracy_score(response: str, expected: str):
555
+ """
556
+ Is the response a good response to the query?
557
+ """
558
+ result = 1 if expected.lower().strip() == response.lower().strip() else 0
559
+ return MetricResult(
560
+ result=result,
561
+ reason=(
562
+ f"Response contains {expected}"
563
+ if result
564
+ else f"Response does not contain {expected}"
565
+ ),
566
+ )
567
+
568
+
569
+ def mock_app_endpoint(**kwargs) -> str:
570
+ """Mock AI endpoint for testing purposes."""
571
+ mock_responses = [
572
+ "Paris","4","Blue Whale","Einstein","Python","Mount Everest","Shakespeare",
573
+ "Mars","Apple","Leonardo da Vinci",]
574
+ return np.random.choice(mock_responses)
575
+
576
+
577
+ class TestDataRow(BaseModel):
578
+ id: t.Optional[int]
579
+ query: str
580
+ expected_output: str
581
+
582
+
583
+ class ExperimentDataRow(TestDataRow):
584
+ response: str
585
+ accuracy: int
586
+ accuracy_reason: t.Optional[str] = None
587
+
588
+
589
+ @p.experiment(ExperimentDataRow)
590
+ async def run_experiment(row: TestDataRow):
591
+ response = mock_app_endpoint(query=row.query)
592
+ accuracy = accuracy_score.score(response=response, expected=row.expected_output)
593
+
594
+ experiment_view = ExperimentDataRow(
595
+ **row.model_dump(),
596
+ response=response,
597
+ accuracy=accuracy.result,
598
+ accuracy_reason=accuracy.reason,
599
+ )
600
+ return experiment_view
601
+ '''
602
+
603
+ evals_path = os .path .join (directory , "hello_world" , "evals.py" )
604
+ with open (evals_path , "w" ) as f :
605
+ f .write (evals_content )
606
+ time .sleep (0.5 ) # Brief pause to show spinner
607
+
608
+ live .update (Spinner ("dots" , text = "Finalizing hello world example..." ))
609
+ time .sleep (0.5 ) # Brief pause to show spinner
610
+
611
+ hello_world_path = os .path .join (directory , "hello_world" )
612
+ success (f"✓ Created hello world example in { hello_world_path } " )
613
+ success (
614
+ "✓ You can now run: ragas evals hello_world/evals.py --dataset test_data --metrics accuracy"
615
+ )
616
+
617
+
442
618
if __name__ == "__main__" :
443
619
app ()
0 commit comments