|
41 | 41 | }, |
42 | 42 | { |
43 | 43 | "cell_type": "code", |
44 | | - "execution_count": 2, |
| 44 | + "execution_count": null, |
45 | 45 | "id": "1f1bb86c", |
46 | 46 | "metadata": {}, |
47 | 47 | "outputs": [], |
|
171 | 171 | }, |
172 | 172 | { |
173 | 173 | "cell_type": "code", |
174 | | - "execution_count": 6, |
175 | | - "id": "5e225998", |
| 174 | + "execution_count": 8, |
| 175 | + "id": "58168d30", |
176 | 176 | "metadata": {}, |
177 | 177 | "outputs": [], |
| 178 | + "source": [ |
| 179 | + "def clean_enum_repr(s):\n", |
| 180 | + " # Regex breakdown:\n", |
| 181 | + " # < : matches the opening bracket\n", |
| 182 | + " # ([^:]+) : Capture Group 1: matches everything until the colon (the name)\n", |
| 183 | + " # : : matches the colon\n", |
| 184 | + " # [^>]+ : matches the value and anything else until the closing bracket\n", |
| 185 | + " # > : matches the closing bracket\n", |
| 186 | + " return re.sub(r'<([^:]+):[^>]+>', r'\\1', s)" |
| 187 | + ] |
| 188 | + }, |
| 189 | + { |
| 190 | + "cell_type": "code", |
| 191 | + "execution_count": 11, |
| 192 | + "id": "5e225998", |
| 193 | + "metadata": {}, |
| 194 | + "outputs": [ |
| 195 | + { |
| 196 | + "name": "stdout", |
| 197 | + "output_type": "stream", |
| 198 | + "text": [ |
| 199 | + "Loaded 11 metrics:\n", |
| 200 | + "- backups_per_student\n", |
| 201 | + "- total_time_spent_days\n", |
| 202 | + "- problems_solved_by_last_backup\n", |
| 203 | + "- backups_per_minute\n", |
| 204 | + "- time_between_backups\n", |
| 205 | + "- net_num_lines_added\n", |
| 206 | + "- num_occurrences_print\n", |
| 207 | + "- backups_with_print\n", |
| 208 | + "- backups_per_problem\n", |
| 209 | + "- worksessions_per_student\n", |
| 210 | + "- worksession_length_per_student\n" |
| 211 | + ] |
| 212 | + } |
| 213 | + ], |
178 | 214 | "source": [ |
179 | 215 | "load_results = input(\"Would you like to load the existing results.json file? Y/N: \")\n", |
180 | 216 | "if load_results.strip().upper() == \"Y\":\n", |
181 | 217 | " if os.path.exists(\"results.json\"):\n", |
182 | 218 | " with open(\"results.json\") as f:\n", |
183 | | - " results = json.load(f)\n", |
184 | | - " print(f\"Loaded metrics from results.json: {list(results.keys())}\")\n", |
| 219 | + " raw_results = json.load(f)\n", |
| 220 | + "\n", |
| 221 | + " deserialized_results = {}\n", |
| 222 | + "\n", |
| 223 | + " for metric, courses in raw_results.items():\n", |
| 224 | + " deserialized_results[metric] = {}\n", |
| 225 | + "\n", |
| 226 | + " for course_str, data in courses.items():\n", |
| 227 | + " # Remove extra characters from enum repr so that eval(...) works\n", |
| 228 | + " course_str = clean_enum_repr(course_str)\n", |
| 229 | + "\n", |
| 230 | + " # Deserialize course string to Course object\n", |
| 231 | + " course_obj = eval(course_str)\n", |
| 232 | + " assert isinstance(course_obj, Course)\n", |
| 233 | + "\n", |
| 234 | + " # Deserialize the data\n", |
| 235 | + " if isinstance(data, dict) and data.get(\"__df\"):\n", |
| 236 | + " # Remove the flag before creating the DataFrame\n", |
| 237 | + " data.pop(\"__df\")\n", |
| 238 | + " processed_data = pd.DataFrame.from_dict(data)\n", |
| 239 | + " elif isinstance(data, list):\n", |
| 240 | + " processed_data = np.array(data)\n", |
| 241 | + " else:\n", |
| 242 | + " processed_data = data\n", |
| 243 | + "\n", |
| 244 | + " deserialized_results[metric][course_obj] = processed_data\n", |
| 245 | + "\n", |
| 246 | + " results = deserialized_results\n", |
| 247 | + "\n", |
| 248 | + " print(f\"Loaded {len(results)} metrics:\")\n", |
| 249 | + " for metric in results.keys():\n", |
| 250 | + " print(f\"- {metric}\")\n", |
185 | 251 | " else:\n", |
186 | 252 | " print(\"results.json not found, defaulting to empty dict\")\n", |
187 | 253 | " results = {}\n", |
|
191 | 257 | }, |
192 | 258 | { |
193 | 259 | "cell_type": "code", |
194 | | - "execution_count": 7, |
| 260 | + "execution_count": 12, |
195 | 261 | "id": "2422667d", |
196 | 262 | "metadata": {}, |
197 | 263 | "outputs": [], |
|
267 | 333 | }, |
268 | 334 | { |
269 | 335 | "cell_type": "code", |
270 | | - "execution_count": 11, |
| 336 | + "execution_count": 13, |
271 | 337 | "id": "f9bf7d88", |
272 | 338 | "metadata": {}, |
273 | 339 | "outputs": [ |
|
277 | 343 | "<Axes: ylabel='Count'>" |
278 | 344 | ] |
279 | 345 | }, |
280 | | - "execution_count": 11, |
| 346 | + "execution_count": 13, |
281 | 347 | "metadata": {}, |
282 | 348 | "output_type": "execute_result" |
283 | 349 | }, |
|
3718 | 3784 | "outputs": [], |
3719 | 3785 | "source": [ |
3720 | 3786 | "with open(\"results.json\", \"w\") as f:\n", |
3721 | | - " json.dump(results, f)" |
| 3787 | + " serialized_results = {}\n", |
| 3788 | + "\n", |
| 3789 | + " for metric, courses in results.items():\n", |
| 3790 | + " # Initialize the metric dictionary so we don't get a KeyError\n", |
| 3791 | + " serialized_results[metric] = {}\n", |
| 3792 | + "\n", |
| 3793 | + " for course, data in courses.items():\n", |
| 3794 | + " # Convert courses into repr strings for deserialization\n", |
| 3795 | + " course_key = repr(course)\n", |
| 3796 | + "\n", |
| 3797 | + " if isinstance(data, pd.DataFrame):\n", |
| 3798 | + " # .to_json() returns a string; we parse it to a dict to modify it\n", |
| 3799 | + " df_dict = json.loads(data.to_json())\n", |
| 3800 | + " df_dict[\"__df\"] = True\n", |
| 3801 | + " serialized_results[metric][course_key] = df_dict\n", |
| 3802 | + " elif isinstance(data, np.ndarray):\n", |
| 3803 | + " # NumPy arrays aren't JSON serializable by default\n", |
| 3804 | + " serialized_results[metric][course_key] = data.tolist()\n", |
| 3805 | + " else:\n", |
| 3806 | + " serialized_results[metric][course_key] = data\n", |
| 3807 | + "\n", |
| 3808 | + " json.dump(serialized_results, f, indent=2)" |
3722 | 3809 | ] |
3723 | 3810 | }, |
3724 | 3811 | { |
|
0 commit comments