Skip to content

Commit b52d966

Browse files
committed
Support serializing/deserializing results.json so that we don't have to recompute metrics to redo figures
1 parent 6cb6a24 commit b52d966

1 file changed

Lines changed: 96 additions & 9 deletions

File tree

src/notebooks/technical_report.ipynb

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
},
4242
{
4343
"cell_type": "code",
44-
"execution_count": 2,
44+
"execution_count": null,
4545
"id": "1f1bb86c",
4646
"metadata": {},
4747
"outputs": [],
@@ -171,17 +171,83 @@
171171
},
172172
{
173173
"cell_type": "code",
174-
"execution_count": 6,
175-
"id": "5e225998",
174+
"execution_count": 8,
175+
"id": "58168d30",
176176
"metadata": {},
177177
"outputs": [],
178+
"source": [
179+
"def clean_enum_repr(s):\n",
180+
" # Regex breakdown:\n",
181+
" # < : matches the opening bracket\n",
182+
" # ([^:]+) : Capture Group 1: matches everything until the colon (the name)\n",
183+
" # : : matches the colon\n",
184+
" # [^>]+ : matches the value and anything else until the closing bracket\n",
185+
" # > : matches the closing bracket\n",
186+
" return re.sub(r'<([^:]+):[^>]+>', r'\\1', s)"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": 11,
192+
"id": "5e225998",
193+
"metadata": {},
194+
"outputs": [
195+
{
196+
"name": "stdout",
197+
"output_type": "stream",
198+
"text": [
199+
"Loaded 11 metrics:\n",
200+
"- backups_per_student\n",
201+
"- total_time_spent_days\n",
202+
"- problems_solved_by_last_backup\n",
203+
"- backups_per_minute\n",
204+
"- time_between_backups\n",
205+
"- net_num_lines_added\n",
206+
"- num_occurrences_print\n",
207+
"- backups_with_print\n",
208+
"- backups_per_problem\n",
209+
"- worksessions_per_student\n",
210+
"- worksession_length_per_student\n"
211+
]
212+
}
213+
],
178214
"source": [
179215
"load_results = input(\"Would you like to load the existing results.json file? Y/N: \")\n",
180216
"if load_results.strip().upper() == \"Y\":\n",
181217
" if os.path.exists(\"results.json\"):\n",
182218
" with open(\"results.json\") as f:\n",
183-
" results = json.load(f)\n",
184-
" print(f\"Loaded metrics from results.json: {list(results.keys())}\")\n",
219+
" raw_results = json.load(f)\n",
220+
"\n",
221+
" deserialized_results = {}\n",
222+
"\n",
223+
" for metric, courses in raw_results.items():\n",
224+
" deserialized_results[metric] = {}\n",
225+
"\n",
226+
" for course_str, data in courses.items():\n",
227+
" # Remove extra characters from enum repr so that eval(...) works\n",
228+
" course_str = clean_enum_repr(course_str)\n",
229+
"\n",
230+
" # Deserialize course string to Course object\n",
231+
" course_obj = eval(course_str)\n",
232+
" assert isinstance(course_obj, Course)\n",
233+
"\n",
234+
" # Deserialize the data\n",
235+
" if isinstance(data, dict) and data.get(\"__df\"):\n",
236+
" # Remove the flag before creating the DataFrame\n",
237+
" data.pop(\"__df\")\n",
238+
" processed_data = pd.DataFrame.from_dict(data)\n",
239+
" elif isinstance(data, list):\n",
240+
" processed_data = np.array(data)\n",
241+
" else:\n",
242+
" processed_data = data\n",
243+
"\n",
244+
" deserialized_results[metric][course_obj] = processed_data\n",
245+
"\n",
246+
" results = deserialized_results\n",
247+
"\n",
248+
" print(f\"Loaded {len(results)} metrics:\")\n",
249+
" for metric in results.keys():\n",
250+
" print(f\"- {metric}\")\n",
185251
" else:\n",
186252
" print(\"results.json not found, defaulting to empty dict\")\n",
187253
" results = {}\n",
@@ -191,7 +257,7 @@
191257
},
192258
{
193259
"cell_type": "code",
194-
"execution_count": 7,
260+
"execution_count": 12,
195261
"id": "2422667d",
196262
"metadata": {},
197263
"outputs": [],
@@ -267,7 +333,7 @@
267333
},
268334
{
269335
"cell_type": "code",
270-
"execution_count": 11,
336+
"execution_count": 13,
271337
"id": "f9bf7d88",
272338
"metadata": {},
273339
"outputs": [
@@ -277,7 +343,7 @@
277343
"<Axes: ylabel='Count'>"
278344
]
279345
},
280-
"execution_count": 11,
346+
"execution_count": 13,
281347
"metadata": {},
282348
"output_type": "execute_result"
283349
},
@@ -3718,7 +3784,28 @@
37183784
"outputs": [],
37193785
"source": [
37203786
"with open(\"results.json\", \"w\") as f:\n",
3721-
" json.dump(results, f)"
3787+
" serialized_results = {}\n",
3788+
"\n",
3789+
" for metric, courses in results.items():\n",
3790+
" # Initialize the metric dictionary so we don't get a KeyError\n",
3791+
" serialized_results[metric] = {}\n",
3792+
"\n",
3793+
" for course, data in courses.items():\n",
3794+
" # Convert courses into repr strings for deserialization\n",
3795+
" course_key = repr(course)\n",
3796+
"\n",
3797+
" if isinstance(data, pd.DataFrame):\n",
3798+
" # .to_json() returns a string; we parse it to a dict to modify it\n",
3799+
" df_dict = json.loads(data.to_json())\n",
3800+
" df_dict[\"__df\"] = True\n",
3801+
" serialized_results[metric][course_key] = df_dict\n",
3802+
" elif isinstance(data, np.ndarray):\n",
3803+
" # NumPy arrays aren't JSON serializable by default\n",
3804+
" serialized_results[metric][course_key] = data.tolist()\n",
3805+
" else:\n",
3806+
" serialized_results[metric][course_key] = data\n",
3807+
"\n",
3808+
" json.dump(serialized_results, f, indent=2)"
37223809
]
37233810
},
37243811
{

0 commit comments

Comments
 (0)