Skip to content

Commit 26edfdb

Browse files
Merge pull request #1307 from liam-hq/move-to-experiment-2
Migrate prompt test CI workflow from Promptfoo to Langfuse
2 parents 917d235 + 634fb53 commit 26edfdb

File tree

12 files changed

+314
-4019
lines changed

12 files changed

+314
-4019
lines changed

.github/workflows/prompt-test.yml

+23-51
Original file line numberDiff line numberDiff line change
@@ -17,57 +17,31 @@ jobs:
1717
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
1818
- uses: ./.github/actions/pnpm-setup
1919

20-
- name: Set up promptfoo cache
21-
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
22-
with:
23-
path: ~/.promptfoo/cache
24-
key: ${{ runner.os }}-promptfoo-v1
25-
restore-keys: |
26-
${{ runner.os }}-promptfoo-
27-
28-
- name: Run promptfoo
20+
- name: Run pnpm test:prompt
2921
env:
30-
OPENAI_API_KEY: ${{ secrets.PROMPTFOO_OPENAI_API_KEY }}
31-
PROMPTFOO_EMAIL: ${{ secrets.PROMPTFOO_EMAIL }}
32-
id: promptfoo
22+
OPENAI_API_KEY: ${{ secrets.PROMPT_TEST_OPENAI_API_KEY }}
23+
LANGFUSE_PUBLIC_KEY: ${{ secrets.PROMPT_TEST_LANGFUSE_PUBLIC_KEY }}
24+
LANGFUSE_SECRET_KEY: ${{ secrets.PROMPT_TEST_LANGFUSE_SECRET_KEY }}
25+
id: pnpm-test-prompt
3326
shell: bash
27+
# TODO: fail when there are errors or failures
28+
# TODO: Consider using turborepo passthrough variables.
3429
run: |
35-
mkdir -p ~/.promptfoo
36-
echo "account:\n email: ${PROMPTFOO_EMAIL}" > ~/.promptfoo/promptfoo.yaml
30+
# see https://turbo.build/docs/crafting-your-repository/using-environment-variables#passthrough-variables
3731
echo "OPENAI_API_KEY=${OPENAI_API_KEY}" > .env.local
32+
echo "LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY}" >> .env
33+
echo "LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY}" >> .env
34+
35+
# TODO: fail when there are errors or failures
3836
pnpm test:prompt
39-
rm -f .env.local
40-
# check
41-
ls -alF ~/.promptfoo
42-
failures_count=$(cat frontend/packages/prompt-test/results.json | jq .results.stats.failures)
43-
errors_count=$(cat frontend/packages/prompt-test/results.json | jq .results.stats.errors)
44-
successes_count=$(cat frontend/packages/prompt-test/results.json | jq .results.stats.successes)
45-
if [ $failures_count -eq 0 ]; then
46-
if [ $errors_count -eq 0 ]; then
47-
output="✅️ Promptfoo test succeeded"
48-
exit_code=0
49-
else
50-
output="❌️ Promptfoo test failed"
51-
exit_code=1
52-
fi
53-
else
54-
output="❌️ Promptfoo test failed"
55-
exit_code=1
56-
fi
57-
echo "output=$output" >> $GITHUB_OUTPUT
58-
echo "successes_count=$successes_count" >> $GITHUB_OUTPUT
59-
echo "failures_count=$failures_count" >> $GITHUB_OUTPUT
60-
echo "errors_count=$errors_count" >> $GITHUB_OUTPUT
61-
# NOTE: if you want treat failures as workflow error, uncomment the following line.
62-
# exit $exit_code
6337
64-
- name: Share result
65-
id: share-result
66-
if: always()
67-
shell: bash
68-
run: |
69-
output=$(pnpm dlx promptfoo@0.107.7 share --yes | tail -1)
70-
echo "output=$output" >> $GITHUB_OUTPUT
38+
rm -f .env .env.local
39+
40+
url=$(cat frontend/packages/prompt-test/result.json | jq -r .url)
41+
echo "url=$url" >> $GITHUB_OUTPUT
42+
43+
datasetRunItemsLength=$(cat frontend/packages/prompt-test/result.json | jq -r .datasetRunItemsLength)
44+
echo "datasetRunItemsLength=$datasetRunItemsLength" >> $GITHUB_OUTPUT
7145
7246
- name: Post result
7347
if: always()
@@ -76,10 +50,8 @@ jobs:
7650
message: |
7751
frontend/packages/prompt-test result:
7852
79-
${{steps.share-result.outputs.output}}
80-
81-
${{steps.promptfoo.outputs.output}}
53+
visit: ${{steps.pnpm-test-prompt.outputs.url}}
8254
83-
| ✅️ Successes | ❌️ Failures | ⚠️ Errors |
84-
| --- | --- | --- |
85-
| ${{steps.promptfoo.outputs.successes_count}} | ${{steps.promptfoo.outputs.failures_count}} | ${{steps.promptfoo.outputs.errors_count}} |
55+
| run items length |
56+
| --- |
57+
| ${{steps.pnpm-test-prompt.outputs.datasetRunItemsLength}} |

frontend/packages/jobs/src/prompts/generateReview/generateReview.ts

+13-10
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,19 @@ File Changes:
9898

9999
export const reviewJsonSchema: JSONSchema7 = toJsonSchema(reviewSchema)
100100

101+
const model = new ChatOpenAI({
102+
model: 'o3-mini-2025-01-31',
103+
})
104+
105+
const chatPrompt = ChatPromptTemplate.fromMessages([
106+
['system', SYSTEM_PROMPT],
107+
['human', USER_PROMPT],
108+
])
109+
110+
export const chain = chatPrompt.pipe(
111+
model.withStructuredOutput(reviewJsonSchema),
112+
)
113+
101114
export const generateReview = async (
102115
docsContent: string,
103116
schemaFile: GenerateReviewPayload['schemaFile'],
@@ -107,16 +120,6 @@ export const generateReview = async (
107120
callbacks: Callbacks,
108121
runId: string,
109122
) => {
110-
const chatPrompt = ChatPromptTemplate.fromMessages([
111-
['system', SYSTEM_PROMPT],
112-
['human', USER_PROMPT],
113-
])
114-
115-
const model = new ChatOpenAI({
116-
model: 'o3-mini-2025-01-31',
117-
})
118-
119-
const chain = chatPrompt.pipe(model.withStructuredOutput(reviewJsonSchema))
120123
const response = await chain.invoke(
121124
{
122125
docsContent,
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
1-
export { SYSTEM_PROMPT, USER_PROMPT, reviewJsonSchema } from './generateReview'
1+
export {
2+
SYSTEM_PROMPT,
3+
USER_PROMPT,
4+
reviewJsonSchema,
5+
chain,
6+
} from './generateReview'

frontend/packages/prompt-test/.env

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../.env
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
.env.local
2-
results.json
2+
result.json
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# @liam-hq/prompt-test
2+
3+
## What is this?
4+
5+
Testing for `@liam-hq/jobs` prompts.
6+
7+
## Features
8+
9+
- Runtime environments
10+
- [x] CI (GitHub Actions)
11+
- [x] Local development
12+
- Langfuse integration
13+
- [x] `Tracing`
14+
- [x] `Dataset`, `Dataset Items`
15+
- Automatically syncs YAML fixtures from `src/fixtures/` to Langfuse dataset items
16+
- [x] `Evaluation` and `Templates`
17+
- Supported `@liam-hq/jobs` prompts
18+
- [x] generateReview
19+
- [ ] generateDocsSuggestion
20+
- [ ] generateSchemaMeta
21+
22+
## Usage
23+
24+
Run the test suite:
25+
26+
```bash
27+
pnpm --filter=@liam-hq/prompt-test test
28+
```
29+
30+
## Test Structure
31+
32+
Tests are defined in YAML fixtures located in `src/fixtures/`. Each test fixture contains:
33+
- Test name
34+
- Input variables
35+
- Assertions for validation
36+
37+
## Development
38+
39+
To add new test cases:
40+
1. Create a new YAML fixture in `src/fixtures/`
41+
2. Define the test inputs and assertions
42+
3. Run the test suite to validate the changes

frontend/packages/prompt-test/package.json

+4-2
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
"@liam-hq/configs": "workspace:*",
1010
"@liam-hq/jobs": "workspace:*",
1111
"dotenv": "16.4.7",
12-
"promptfoo": "0.107.7",
12+
"langfuse": "3.37.1",
13+
"langfuse-langchain": "3.37.1",
1314
"tsx": "4.19.3",
14-
"typescript": "5.8.3"
15+
"typescript": "5.8.3",
16+
"yaml": "2.7.1"
1517
},
1618
"scripts": {
1719
"fmt": "concurrently \"pnpm:fmt:*\"",

frontend/packages/prompt-test/src/fixtures/github.com/liam-hq/liam/pull/1033/fixture.yaml

+6-160
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
---
2+
name: 'github.com/liam-hq/liam/pull/1033'
3+
24
assert:
35
- type: llm-rubric
46
value: The report mentions about ensuring that this table is empty before the migration
5-
- type: is-json
67
- type: javascript
78
# `feedbacks` Test 1: Check if the snippet has the exact correct file path
89
value: |
@@ -19,169 +20,14 @@ assert:
1920
# `bodyMarkdown` Test 2: Check if the bodyMarkdown word count is approximately less than or equal to 80
2021
value: |
2122
output.bodyMarkdown.split(" ").length <= 80
22-
- type: cost
23-
threshold: 0.008
2423
2524
vars:
26-
'# ref': github.com/liam-hq/liam/pull/1033
25+
prDescription: ''
26+
prComments: ''
2727
docsContent: ''
28-
schemaFiles:
29-
- filename: frontend/packages/db/prisma/schema.prisma
28+
schemaFile:
29+
filename: frontend/packages/db/schema/schema.sql
3030
content: |
31-
datasource db {
32-
provider = "postgresql"
33-
// NOTE: Use the non-pooling URL to avoid PrismaClientUnknownRequestError for now
34-
url = env("POSTGRES_URL_NON_POOLING")
35-
}
36-
37-
generator client {
38-
provider = "prisma-client-js"
39-
binaryTargets = ["native", "rhel-openssl-3.0.x", "debian-openssl-3.0.x"]
40-
}
41-
42-
model Project {
43-
id Int @id @default(autoincrement())
44-
name String
45-
createdAt DateTime @default(now())
46-
updatedAt DateTime @updatedAt
47-
reviews OverallReview[]
48-
repositoryMappings ProjectRepositoryMapping[]
49-
watchSchemaFilePatterns WatchSchemaFilePattern[]
50-
docs Doc[]
51-
knowledgeSuggestions KnowledgeSuggestion[]
52-
githubDocFilePaths GitHubDocFilePath[]
53-
}
54-
55-
model Repository {
56-
id Int @id @default(autoincrement())
57-
name String
58-
owner String
59-
installationId BigInt
60-
isActive Boolean @default(true)
61-
createdAt DateTime @default(now())
62-
updatedAt DateTime @updatedAt
63-
64-
pullRequests PullRequest[]
65-
projectMappings ProjectRepositoryMapping[]
66-
67-
@@unique([owner, name])
68-
}
69-
70-
model PullRequest {
71-
id Int @id @default(autoincrement())
72-
pullNumber BigInt
73-
commentId BigInt?
74-
createdAt DateTime @default(now())
75-
updatedAt DateTime @updatedAt
76-
migration Migration?
77-
repositoryId Int
78-
repository Repository @relation(fields: [repositoryId], references: [id])
79-
reviews OverallReview[]
80-
81-
@@unique([repositoryId, pullNumber])
82-
}
83-
84-
model Migration {
85-
id Int @id @default(autoincrement())
86-
title String
87-
pullRequestId Int @unique
88-
pullRequest PullRequest @relation(fields: [pullRequestId], references: [id])
89-
createdAt DateTime @default(now())
90-
updatedAt DateTime @updatedAt
91-
}
92-
93-
model OverallReview {
94-
id Int @id @default(autoincrement())
95-
projectId Int?
96-
project Project? @relation(fields: [projectId], references: [id])
97-
pullRequestId Int
98-
pullRequest PullRequest @relation(fields: [pullRequestId], references: [id])
99-
branchName String
100-
reviewComment String?
101-
reviewedAt DateTime @default(now())
102-
createdAt DateTime @default(now())
103-
updatedAt DateTime @updatedAt
104-
}
105-
106-
model ProjectRepositoryMapping {
107-
id Int @id @default(autoincrement())
108-
projectId Int
109-
project Project @relation(fields: [projectId], references: [id])
110-
repositoryId Int
111-
repository Repository @relation(fields: [repositoryId], references: [id])
112-
createdAt DateTime @default(now())
113-
updatedAt DateTime @updatedAt
114-
115-
@@unique([projectId, repositoryId])
116-
}
117-
118-
model WatchSchemaFilePattern {
119-
id Int @id @default(autoincrement())
120-
pattern String
121-
projectId Int
122-
project Project @relation(fields: [projectId], references: [id])
123-
createdAt DateTime @default(now())
124-
updatedAt DateTime @updatedAt
125-
}
126-
127-
model Doc {
128-
id Int @id @default(autoincrement())
129-
title String
130-
content String
131-
latestVersionId Int?
132-
projectId Int
133-
project Project @relation(fields: [projectId], references: [id])
134-
createdAt DateTime @default(now())
135-
updatedAt DateTime @updatedAt
136-
137-
versions DocVersion[]
138-
}
139-
140-
model DocVersion {
141-
id Int @id @default(autoincrement())
142-
docId Int
143-
doc Doc @relation(fields: [docId], references: [id])
144-
version Int
145-
title String
146-
content String
147-
createdAt DateTime @default(now())
148-
149-
@@unique([docId, version])
150-
}
151-
152-
enum KnowledgeType {
153-
SCHEMA
154-
DOCS
155-
}
156-
157-
model KnowledgeSuggestion {
158-
id Int @id @default(autoincrement())
159-
type KnowledgeType // Either Schema or Docs
160-
title String // Used as commit message
161-
path String // Target file path
162-
content String // Full content of the new file
163-
fileSha String // SHA of the file to be updated
164-
branchName String // Branch name for GitHub operations
165-
projectId Int
166-
project Project @relation(fields: [projectId], references: [id])
167-
approvedAt DateTime? // Approval timestamp (null if not approved)
168-
createdAt DateTime @default(now())
169-
updatedAt DateTime @updatedAt
170-
}
171-
172-
model GitHubDocFilePath {
173-
id Int @id @default(autoincrement())
174-
path String // File path in GitHub repository
175-
isReviewEnabled Boolean @default(true) // Whether ReviewAgent should read this file
176-
projectId Int
177-
project Project @relation(fields: [projectId], references: [id])
178-
createdAt DateTime @default(now())
179-
updatedAt DateTime @updatedAt
180-
181-
@@unique([path, projectId]) // Composite unique key to ensure no duplicate paths within a project
182-
}
183-
- filename: frontend/packages/db/schema/schema.sql
184-
content: |+
18531
--
18632
-- PostgreSQL database dump
18733
--

0 commit comments

Comments
 (0)