From 90b8bb6193d36fddfcad2208135bf21f696ec276 Mon Sep 17 00:00:00 2001 From: wuhongsheng <664116298@qq.com> Date: Fri, 8 Nov 2024 13:58:12 +0800 Subject: [PATCH 1/2] fix:JSON serialization problem --- nano_graphrag/_op.py | 4 ++-- nano_graphrag/_storage/gdb_networkx.py | 6 +++--- nano_graphrag/_utils.py | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/nano_graphrag/_op.py b/nano_graphrag/_op.py index 4ee9ed5..9714a63 100644 --- a/nano_graphrag/_op.py +++ b/nano_graphrag/_op.py @@ -1,5 +1,5 @@ import re -import json +import json5 import asyncio import tiktoken from typing import Union @@ -680,7 +680,7 @@ async def _find_most_related_community_from_entities( for node_d in node_datas: if "clusters" not in node_d: continue - related_communities.extend(json.loads(node_d["clusters"])) + related_communities.extend(json5.loads(node_d["clusters"])) related_community_dup_keys = [ str(dp["cluster"]) for dp in related_communities diff --git a/nano_graphrag/_storage/gdb_networkx.py b/nano_graphrag/_storage/gdb_networkx.py index e29bf3e..38351d3 100644 --- a/nano_graphrag/_storage/gdb_networkx.py +++ b/nano_graphrag/_storage/gdb_networkx.py @@ -1,5 +1,5 @@ import html -import json +import json5 import os from collections import defaultdict from dataclasses import dataclass @@ -154,7 +154,7 @@ async def community_schema(self) -> dict[str, SingleCommunitySchema]: for node_id, node_data in self._graph.nodes(data=True): if "clusters" not in node_data: continue - clusters = json.loads(node_data["clusters"]) + clusters = json5.loads(node_data["clusters"]) this_node_edges = self._graph.edges(node_id) for cluster in clusters: @@ -195,7 +195,7 @@ async def community_schema(self) -> dict[str, SingleCommunitySchema]: def _cluster_data_to_subgraphs(self, cluster_data: dict[str, list[dict[str, str]]]): for node_id, clusters in cluster_data.items(): - self._graph.nodes[node_id]["clusters"] = json.dumps(clusters) + self._graph.nodes[node_id]["clusters"] = json5.dumps(clusters) async def _leiden_clustering(self): from graspologic.partition import hierarchical_leiden diff --git a/nano_graphrag/_utils.py b/nano_graphrag/_utils.py index ae772eb..17111ab 100644 --- a/nano_graphrag/_utils.py +++ b/nano_graphrag/_utils.py @@ -1,6 +1,6 @@ import asyncio import html -import json +import json5 import logging import os import re @@ -45,8 +45,8 @@ def extract_first_complete_json(s: str): first_json_str = s[first_json_start:i+1] try: # Attempt to parse the JSON string - return json.loads(first_json_str.replace("\n", "")) - except json.JSONDecodeError as e: + return json5.loads(first_json_str.replace("\n", "")) + except json5.JSONDecodeError as e: logger.error(f"JSON decoding failed: {e}. Attempted string: {first_json_str[:50]}...") return None finally: @@ -151,14 +151,14 @@ def compute_mdhash_id(content, prefix: str = ""): def write_json(json_obj, file_name): with open(file_name, "w", encoding="utf-8") as f: - json.dump(json_obj, f, indent=2, ensure_ascii=False) + json5.dump(json_obj, f, indent=2, ensure_ascii=False) def load_json(file_name): if not os.path.exists(file_name): return None with open(file_name, encoding="utf-8") as f: - return json.load(f) + return json5.load(f) # it's dirty to type, so it's a good way to have fun From 92bc4475f69a169c2a5bb18d566584b9d0d225f6 Mon Sep 17 00:00:00 2001 From: wuhongsheng <664116298@qq.com> Date: Mon, 11 Nov 2024 16:55:50 +0800 Subject: [PATCH 2/2] fix:JSON serialization problem --- nano_graphrag/_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nano_graphrag/_utils.py b/nano_graphrag/_utils.py index 17111ab..e44222a 100644 --- a/nano_graphrag/_utils.py +++ b/nano_graphrag/_utils.py @@ -1,6 +1,7 @@ import asyncio import html import json5 +import json import logging import os import re @@ -45,8 +46,8 @@ def extract_first_complete_json(s: str): first_json_str = s[first_json_start:i+1] try: # Attempt to parse the JSON string - return json5.loads(first_json_str.replace("\n", "")) - except json5.JSONDecodeError as e: + return json.loads(first_json_str.replace("\n", "")) + except json.JSONDecodeError as e: logger.error(f"JSON decoding failed: {e}. Attempted string: {first_json_str[:50]}...") return None finally: