Skip to content
This repository was archived by the owner on Oct 4, 2021. It is now read-only.

Commit 82eee16

Browse files
committed
feat: v0 rapport post integration
1 parent da73237 commit 82eee16

File tree

1 file changed

+225
-0
lines changed

1 file changed

+225
-0
lines changed
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "relevant-fighter",
6+
"metadata": {},
7+
"source": [
8+
"# Automated Post-integration Report - Signaux Faibles\n",
9+
"This notebook can be run after each new data integration by the [opensignauxfaibles](https://github.com/signaux-faibles/opensignauxfaibles) codebase."
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"id": "straight-detroit",
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"VARIABLES = [\n",
20+
" \"financier_court_terme\",\n",
21+
" \"interets\",\n",
22+
" \"ca\",\n",
23+
" \"equilibre_financier\",\n",
24+
" \"endettement\",\n",
25+
" \"degre_immo_corporelle\",\n",
26+
" \"liquidite_reduite\",\n",
27+
" \"poids_bfr_exploitation\",\n",
28+
" \"productivite_capital_investi\",\n",
29+
" \"rentabilite_economique\",\n",
30+
" \"rentabilite_nette\",\n",
31+
" \"cotisation\",\n",
32+
" \"cotisation_moy12m\",\n",
33+
" \"montant_part_ouvriere\",\n",
34+
" \"montant_part_ouvriere_past_1\",\n",
35+
" \"montant_part_ouvriere_past_12\",\n",
36+
" \"montant_part_ouvriere_past_2\",\n",
37+
" \"montant_part_ouvriere_past_3\",\n",
38+
" \"montant_part_ouvriere_past_6\",\n",
39+
" \"montant_part_patronale\",\n",
40+
" \"montant_part_patronale_past_1\",\n",
41+
" \"montant_part_patronale_past_12\",\n",
42+
" \"montant_part_patronale_past_2\",\n",
43+
" \"montant_part_patronale_past_3\",\n",
44+
" \"montant_part_patronale_past_6\",\n",
45+
" \"ratio_dette\",\n",
46+
" \"ratio_dette_moy12m\",\n",
47+
" \"effectif\",\n",
48+
" \"apart_heures_consommees_cumulees\",\n",
49+
" \"apart_heures_consommees\",\n",
50+
" \"paydex_nb_jours\",\n",
51+
" \"paydex_nb_jours_past_12\",\n",
52+
"]\n",
53+
"# ces variables sont toujours requêtées\n",
54+
"VARIABLES += [\"outcome\", \"periode\", \"siret\", \"siren\", \"time_til_outcome\", \"code_naf\"]"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"id": "turkish-newport",
60+
"metadata": {},
61+
"source": [
62+
"## Fetch a random sample of the data"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"id": "reported-peoples",
69+
"metadata": {},
70+
"outputs": [],
71+
"source": [
72+
"%config Completer.use_jedi = False\n",
73+
"import pandas as pd"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"id": "married-drinking",
80+
"metadata": {},
81+
"outputs": [],
82+
"source": [
83+
"from predictsignauxfaibles.data import SFDataset"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"id": "authentic-rendering",
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"dataset = SFDataset(\n",
94+
" fields = VARIABLES,\n",
95+
" sample_size=10_000\n",
96+
")\n",
97+
"dataset.fetch_data();"
98+
]
99+
},
100+
{
101+
"cell_type": "markdown",
102+
"id": "presidential-acrobat",
103+
"metadata": {},
104+
"source": [
105+
"## Temporal Coverage and NA values"
106+
]
107+
},
108+
{
109+
"cell_type": "code",
110+
"execution_count": null,
111+
"id": "monthly-secretary",
112+
"metadata": {},
113+
"outputs": [],
114+
"source": [
115+
"dataset.data.periode = pd.to_datetime(dataset.data.periode)"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": null,
121+
"id": "otherwise-culture",
122+
"metadata": {},
123+
"outputs": [],
124+
"source": [
125+
"date_range = dataset.data.periode.min().date(), dataset.data.periode.max().date()\n",
126+
"print(f\"Data goes from {date_range[0]} to {date_range[1]}\")"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": null,
132+
"id": "sexual-chester",
133+
"metadata": {},
134+
"outputs": [],
135+
"source": [
136+
"(dataset.data.isna().sum() / len(dataset) * 100).sort_values(ascending = False).to_frame()"
137+
]
138+
},
139+
{
140+
"cell_type": "markdown",
141+
"id": "oriental-flush",
142+
"metadata": {},
143+
"source": [
144+
"## Coverage over time for selected variables"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"id": "historical-brick",
151+
"metadata": {},
152+
"outputs": [],
153+
"source": [
154+
"import matplotlib.pyplot as plt\n",
155+
"%matplotlib inline"
156+
]
157+
},
158+
{
159+
"cell_type": "code",
160+
"execution_count": null,
161+
"id": "awful-nurse",
162+
"metadata": {},
163+
"outputs": [],
164+
"source": [
165+
"def count_na_prop(series):\n",
166+
" return (1 - series.isna().sum() / len(series)) * 100\n",
167+
"\n",
168+
"\n",
169+
"fig, axs = plt.subplots(len(VARIABLES), figsize=(10, 100))\n",
170+
"fig.tight_layout()\n",
171+
"for i, variable in enumerate(VARIABLES):\n",
172+
" grouped = dataset.data.groupby(pd.Grouper(key=\"periode\", freq=\"M\")).agg({f\"{variable}\": count_na_prop})\n",
173+
" axs[i].set_title(f\"{variable}\")\n",
174+
" axs[i].set_ylim([0, 100])\n",
175+
" axs[i].plot_date(grouped.index, grouped[f\"{variable}\"], \"-\");\n",
176+
" axs[i].set(adjustable='box')"
177+
]
178+
},
179+
{
180+
"cell_type": "code",
181+
"execution_count": null,
182+
"id": "lucky-clerk",
183+
"metadata": {},
184+
"outputs": [],
185+
"source": []
186+
},
187+
{
188+
"cell_type": "code",
189+
"execution_count": null,
190+
"id": "proud-volunteer",
191+
"metadata": {},
192+
"outputs": [],
193+
"source": []
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"id": "sensitive-pipeline",
199+
"metadata": {},
200+
"outputs": [],
201+
"source": []
202+
}
203+
],
204+
"metadata": {
205+
"kernelspec": {
206+
"display_name": "sf",
207+
"language": "python",
208+
"name": "sf"
209+
},
210+
"language_info": {
211+
"codemirror_mode": {
212+
"name": "ipython",
213+
"version": 3
214+
},
215+
"file_extension": ".py",
216+
"mimetype": "text/x-python",
217+
"name": "python",
218+
"nbconvert_exporter": "python",
219+
"pygments_lexer": "ipython3",
220+
"version": "3.6.8"
221+
}
222+
},
223+
"nbformat": 4,
224+
"nbformat_minor": 5
225+
}

0 commit comments

Comments
 (0)