Skip to content

Commit c8a39c9

Browse files
👷 automate annuaire entreprises data updates via daily cron (#1488)
Replaces manual dependency tracking with automated daily workflow that: - Monitors `annuaire-entreprises-data-gouv-fr/search-infra` for new commits - Fetches updated data files from specific commit hash - Creates PRs automatically when updates are detected **Key improvements:** - Removed `peerDependencies` pollution (now uses `data/.commit` file) - Replaced `degit` with clean TypeScript fetch script - Removed unused dependencies (`degit`, `npm-run-all2`) - Ensures data files stay in sync with tracked commit hash Like #1483 we down grade the data on purpose to ensure a update PR is created after merge
1 parent 1a92a48 commit c8a39c9

File tree

6 files changed

+168
-20
lines changed

6 files changed

+168
-20
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
name: 🏢 Update Annuaire Entreprises
2+
3+
on:
4+
schedule:
5+
- cron: "0 2 * * *" # Daily at 2 AM UTC
6+
pull_request:
7+
paths:
8+
- "packages/annuaire_entreprises/scripts/build-data.ts"
9+
- ".github/workflows/update-annuaire.yml"
10+
workflow_dispatch:
11+
12+
jobs:
13+
update:
14+
name: 🔄 Update data files
15+
runs-on: ubuntu-latest
16+
env:
17+
DRY_RUN: ${{ github.event_name == 'pull_request' }}
18+
steps:
19+
- name: 📥 Checkout repository
20+
uses: actions/checkout@v5
21+
22+
- name: ⚙️ Enable corepack
23+
run: corepack enable
24+
25+
- name: 📦 Setup Node.js
26+
uses: actions/setup-node@v5
27+
with:
28+
cache: "npm"
29+
node-version-file: package.json
30+
31+
- name: 📥 Install dependencies
32+
run: npm ci
33+
34+
- name: 🔍 Get latest commit from search-infra
35+
id: latest
36+
run: |
37+
LATEST_COMMIT=$(git ls-remote https://github.com/annuaire-entreprises-data-gouv-fr/search-infra.git HEAD | cut -f1)
38+
echo "commit=$LATEST_COMMIT" >> $GITHUB_OUTPUT
39+
echo "short_commit=${LATEST_COMMIT:0:7}" >> $GITHUB_OUTPUT
40+
41+
- name: 📋 Get current commit
42+
id: current
43+
run: |
44+
CURRENT=$(cat packages/annuaire_entreprises/data/.commit)
45+
echo "commit=$CURRENT" >> $GITHUB_OUTPUT
46+
echo "short_commit=${CURRENT:0:7}" >> $GITHUB_OUTPUT
47+
48+
- name: ✅ Check if update needed
49+
id: check
50+
run: |
51+
if [ "${{ steps.latest.outputs.commit }}" != "${{ steps.current.outputs.commit }}" ]; then
52+
echo "needs_update=true" >> $GITHUB_OUTPUT
53+
else
54+
echo "needs_update=false" >> $GITHUB_OUTPUT
55+
fi
56+
57+
- name: 📦 Update commit reference
58+
if: steps.check.outputs.needs_update == 'true'
59+
run: |
60+
echo "${{ steps.latest.outputs.commit }}" > packages/annuaire_entreprises/data/.commit
61+
62+
- name: 🔄 Rebuild data files
63+
if: steps.check.outputs.needs_update == 'true'
64+
run: npm run build:data
65+
working-directory: packages/annuaire_entreprises
66+
67+
- name: 🔍 Check for data changes
68+
if: steps.check.outputs.needs_update == 'true'
69+
id: data_check
70+
run: |
71+
if git diff --quiet packages/annuaire_entreprises/data/*.json; then
72+
echo "needs_update=false" >> $GITHUB_OUTPUT
73+
else
74+
echo "needs_update=true" >> $GITHUB_OUTPUT
75+
fi
76+
77+
- name: 🏷️ Show update summary
78+
if: steps.data_check.outputs.needs_update == 'true'
79+
run: |
80+
echo "📊 Update Summary:"
81+
echo " Current: ${{ steps.current.outputs.short_commit }}"
82+
echo " Latest: ${{ steps.latest.outputs.short_commit }}"
83+
echo " Dry run: ${{ env.DRY_RUN }}"
84+
85+
- name: 📋 Show data diff
86+
if: steps.data_check.outputs.needs_update == 'true'
87+
run: |
88+
echo "::group::📊 Data changes"
89+
git diff --stat packages/annuaire_entreprises/data/
90+
echo ""
91+
git diff packages/annuaire_entreprises/data/
92+
echo "::endgroup::"
93+
94+
- name: 📝 Create changeset
95+
if: steps.data_check.outputs.needs_update == 'true'
96+
run: |
97+
mkdir -p .changeset
98+
cat << EOF > .changeset/update-annuaire-${{ steps.latest.outputs.short_commit }}.md
99+
---
100+
"@proconnect-gouv/proconnect.annuaire_entreprises": patch
101+
---
102+
103+
⬆️ Mise à jour des données annuaire entreprises depuis search-infra@${{ steps.latest.outputs.short_commit }}
104+
EOF
105+
106+
- name: 🔀 Create Pull Request
107+
if: steps.data_check.outputs.needs_update == 'true' && env.DRY_RUN != 'true'
108+
uses: peter-evans/create-pull-request@v7
109+
with:
110+
add-paths: |
111+
.changeset/*.md
112+
packages/annuaire_entreprises/data/.commit
113+
packages/annuaire_entreprises/data/*.json
114+
author: "github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
115+
branch: github-actions/update-annuaire-${{ steps.latest.outputs.short_commit }}
116+
commit-message: "⬆️ bump annuaire-entreprises-data-gouv-fr/search-infra@${{ steps.latest.outputs.short_commit }}"
117+
delete-branch: true
118+
title: "⬆️ bump annuaire-entreprises-data-gouv-fr/search-infra@${{ steps.latest.outputs.short_commit }}"
119+
body: |
120+
<div align=center><img src="https://annuaire-entreprises.data.gouv.fr/images/linkedin.jpg" /></div>
121+
122+
---
123+
124+
Automated update of `@annuaire-entreprises-data-gouv-fr/search-infra` from ${{ steps.current.outputs.short_commit }} to ${{ steps.latest.outputs.short_commit }}.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3c339c4f4e0367eddb24c827667d7db64623f43d

packages/annuaire_entreprises/data/administration_siren_whitelist.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,5 @@
134134
"180020026": "CAISSE DES DEPOTS ET CONSIGNATIONS (CDC)",
135135
"334654035": "MAISONS & CITES SOCIETE ANONYME D'HLM (M & C)",
136136
"264600115": "HOPITAL DE SAINT CERE - SAINT-CERE",
137-
"180000010": "GRANDE CHANCELLERIE DE LA LEGION D'HONNEUR",
138-
"263500126": "CENTRE HOSPITALIER INTERCOMMUNAL REDON-CARENTOIR",
139-
"439988767": "PAVI EVOLUTION (LE MAJESTIC)"
137+
"180000010": "GRANDE CHANCELLERIE DE LA LEGION D'HONNEUR"
140138
}

packages/annuaire_entreprises/package.json

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,9 @@
3131
},
3232
"scripts": {
3333
"build": "tsc --build tsconfig.lib.json",
34-
"prebuild:data": "rm -rf data",
35-
"build:data": "run-p build:degit:*",
34+
"prebuild:data": "npx --yes del-cli data/*.json",
35+
"build:data": "tsx scripts/build-data.ts",
3636
"postbuild:data": "prettier --write data",
37-
"build:degit:administration_nature_juridique": "degit annuaire-entreprises-data-gouv-fr/search-infra/helpers/labels/administration_nature_juridique.json data/administration_nature_juridique.json",
38-
"build:degit:administration_siren_blacklist": "degit annuaire-entreprises-data-gouv-fr/search-infra/helpers/labels/administration_siren_blacklist.json data/administration_siren_blacklist.json",
39-
"build:degit:administration_siren_whitelist": "degit annuaire-entreprises-data-gouv-fr/search-infra/helpers/labels/administration_siren_whitelist.json data/administration_siren_whitelist.json",
4037
"check": "npm run build -- --noEmit",
4138
"dev": "npm run build -- --watch --preserveWatchOutput",
4239
"dev:test": "tsx --watch --test src/**/*.test.ts",
@@ -46,18 +43,8 @@
4643
"devDependencies": {
4744
"@proconnect-gouv/proconnect.devtools.typescript": "1.0.0",
4845
"@types/node": "^22.18.6",
49-
"degit": "^2.8.4",
50-
"npm-run-all2": "^8.0.4",
5146
"tsx": "^4.20.3"
5247
},
53-
"peerDependencies": {
54-
"@annuaire-entreprises-data-gouv-fr/search-infra": "annuaire-entreprises-data-gouv-fr/search-infra#3c339c4f4e0367eddb24c827667d7db64623f43d"
55-
},
56-
"peerDependenciesMeta": {
57-
"@annuaire-entreprises-data-gouv-fr/search-infra": {
58-
"optional": true
59-
}
60-
},
6148
"publishConfig": {
6249
"access": "public",
6350
"provenance": true
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/env tsx
2+
3+
import { readFile, writeFile } from "node:fs/promises";
4+
import { join } from "node:path";
5+
import { fileURLToPath } from "node:url";
6+
7+
const __dirname = fileURLToPath(new URL(".", import.meta.url));
8+
const packageRoot = join(__dirname, "..");
9+
const dataDir = join(packageRoot, "data");
10+
11+
// Read commit hash from data/.commit
12+
const commitPath = join(dataDir, ".commit");
13+
const commit = (await readFile(commitPath, "utf-8")).trim();
14+
15+
console.log(`📦 Fetching data from search-infra@${commit.slice(0, 7)}`);
16+
17+
const files = [
18+
"administration_nature_juridique.json",
19+
"administration_siren_blacklist.json",
20+
"administration_siren_whitelist.json",
21+
];
22+
23+
const baseUrl = `https://raw.githubusercontent.com/annuaire-entreprises-data-gouv-fr/search-infra/${commit}/helpers/labels`;
24+
25+
for (const file of files) {
26+
const url = `${baseUrl}/${file}`;
27+
console.log(`📥 Fetching ${file}...`);
28+
29+
const response = await fetch(url);
30+
if (!response.ok) {
31+
throw new Error(`Failed to fetch ${file}: ${response.statusText}`);
32+
}
33+
34+
const content = await response.text();
35+
await writeFile(join(dataDir, file), content, "utf-8");
36+
console.log(`✅ Wrote ${file}`);
37+
}
38+
39+
console.log("🎉 Data build complete");

packages/annuaire_entreprises/tsconfig.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77
"#data/*": ["./data/*"]
88
},
99
"resolveJsonModule": true,
10-
"rootDir": "src",
1110
"types": ["node"]
1211
},
1312
"extends": "@proconnect-gouv/proconnect.devtools.typescript/base/tsconfig.json",
14-
"include": ["src"],
13+
"include": ["scripts", "src"],
1514
"references": []
1615
}

0 commit comments

Comments
 (0)