From f7b8fd5298fbb4b9bfa4cca79a85e5e57026e96c Mon Sep 17 00:00:00 2001 From: Alex Garel Date: Fri, 2 May 2025 19:53:45 +0200 Subject: [PATCH 1/2] =?UTF-8?q?fix:=C2=A0do=20not=20account=20use=20stopwo?= =?UTF-8?q?rds=20for=20ids?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes: #540 --- backend/editor/controllers/node_controller.py | 5 +- backend/editor/entries.py | 6 +- backend/editor/models/node_models.py | 4 +- backend/tests/data/test.txt | 2 +- .../expected_results/test_delete_project.json | 2 +- .../test_upload_taxonomy.json | 923 ++++++++++++++---- backend/tests/test_export.py | 6 +- .../parser/taxonomy_parser.py | 12 +- parser/openfoodfacts_taxonomy_parser/utils.py | 10 +- parser/tests/data/test.txt | 2 +- .../test_parse_unparse_integration.py | 12 +- .../integration/test_parser_integration.py | 22 +- 12 files changed, 754 insertions(+), 252 deletions(-) diff --git a/backend/editor/controllers/node_controller.py b/backend/editor/controllers/node_controller.py index 0d020953..dd87e914 100644 --- a/backend/editor/controllers/node_controller.py +++ b/backend/editor/controllers/node_controller.py @@ -24,13 +24,12 @@ async def delete_project_nodes(project_id: str): async def create_entry_node( - project_id: str, entry_node: EntryNodeCreate, stopwords: dict[str, list[str]] -) -> str: + project_id: str, entry_node: EntryNodeCreate) -> str: """ Creates a new entry node in the database """ name, language_code = entry_node.name, entry_node.main_language_code - normalized_name = parser_utils.normalize_text(name, language_code, stopwords=stopwords) + normalized_name = parser_utils.normalize_text(name, language_code) # Create params dict entry_node_data = { diff --git a/backend/editor/entries.py b/backend/editor/entries.py index e132d7c4..e6f3881d 100644 --- a/backend/editor/entries.py +++ b/backend/editor/entries.py @@ -79,12 +79,9 @@ async def create_entry_node(self, name, main_language_code) -> str: """ Helper function used to create an entry node with given name and main language """ - stopwords = await self.get_stopwords_dict() - return await create_entry_node( self.project_name, EntryNodeCreate(name=name, main_language_code=main_language_code), - stopwords, ) async def get_local_taxonomy_file(self, tmpdir: str, uploadfile: UploadFile): @@ -571,8 +568,7 @@ async def update_node(self, label: NodeType, new_node: EntryNode): curr_node = EntryNode(**result[0]["n"]) # Recompute normalized tags ids corresponding to entry tags - stopwords = await self.get_stopwords_dict() - new_node.recompute_tags_ids(stopwords) + new_node.recompute_tags_ids() # Build query query = [f"""MATCH (n:{self.project_name}:{label.value}) WHERE n.id = $id """] diff --git a/backend/editor/models/node_models.py b/backend/editor/models/node_models.py index ea199069..72b2eb4f 100644 --- a/backend/editor/models/node_models.py +++ b/backend/editor/models/node_models.py @@ -55,7 +55,7 @@ def flat_dict(self) -> dict[str, Any]: del flat_data["comments"] return flat_data - def recompute_tags_ids(self, stopwords: dict[str, list[str]]): + def recompute_tags_ids(self): """Recompute the tags_ids dictionary based on the tags dictionary and the provided stopwords.""" self.tags_ids = {} @@ -65,7 +65,7 @@ def recompute_tags_ids(self, stopwords: dict[str, list[str]]): normalised_value = [] for value in values: normalised_value.append( - parser_utils.normalize_text(value, keys_language_code, stopwords=stopwords) + parser_utils.normalize_text(value, keys_language_code) ) self.tags_ids["tags_ids" + key[4:]] = normalised_value diff --git a/backend/tests/data/test.txt b/backend/tests/data/test.txt index f654e6f0..64ea19b9 100644 --- a/backend/tests/data/test.txt +++ b/backend/tests/data/test.txt @@ -42,7 +42,7 @@ description:fr: un yaourt avec du citron color:en: yellow flavour:en: lemon - str: """ raw_id = raw_id.strip() lc, main_tag = raw_id.split(":", 1) - normalized_main_tag = normalize_text(main_tag, lc, stopwords=self.stopwords) + normalized_main_tag = normalize_text(main_tag, lc) normalized_id = f"{lc}:{normalized_main_tag}" return normalized_id @@ -138,11 +138,11 @@ def undo_normalize_text(self, text: str) -> str: text = re.sub(r"\\‚", "\\,", text) return text - def _get_lc_value(self, line: str, remove_stopwords=True) -> tuple[str, list[str]]: + def _get_lc_value(self, line: str, remove_stopwords=False) -> tuple[str, list[str]]: """Get the language code "lc" and a list of values and normalized values""" lc, line = line.split(":", 1) values = [self.undo_normalize_text(word.strip()) for word in line.split(",")] - stopwords = self.stopwords if remove_stopwords else [] + stopwords = self.stopwords if remove_stopwords else None tags = [normalize_text(word, lc, stopwords=stopwords) for word in values] return lc, values, tags @@ -264,7 +264,7 @@ def _process_stopwords(self, data, line, line_number, index_stopwords): # remove "stopwords:" part line = line[10:] try: - lc, tags, tags_ids = self._get_lc_value(line, remove_stopwords=False) + lc, tags, tags_ids = self._get_lc_value(line) except ValueError: self.parser_logger.error( f"Missing language code at line {line_number + 1} ? " @@ -317,7 +317,7 @@ def _process_entry(self, data, line, comments): tagsids_list = [] for word in line.split(","): tags_list.append(self.undo_normalize_text(word.strip())) - word_normalized = normalize_text(word, lang, stopwords=self.stopwords) + word_normalized = normalize_text(word, lang) if word_normalized not in tagsids_list: # in case 2 normalized synonyms are the same tagsids_list.append(word_normalized) @@ -553,7 +553,7 @@ def _merge_duplicate_entry_nodes(self, entry_nodes: list[NodeData]) -> list[Node # because two tags can have the same normalized value language_code = key.split("_")[1] first_node.tags[f"tags_ids_{language_code}"] = [ - normalize_text(tag, language_code, stopwords=self.stopwords) + normalize_text(tag, language_code) for tag in first_node.tags[key] ] for key, value in node.properties.items(): diff --git a/parser/openfoodfacts_taxonomy_parser/utils.py b/parser/openfoodfacts_taxonomy_parser/utils.py index 9eaa7961..70174176 100644 --- a/parser/openfoodfacts_taxonomy_parser/utils.py +++ b/parser/openfoodfacts_taxonomy_parser/utils.py @@ -10,7 +10,14 @@ def normalize_text( char: str = "-", stopwords: dict[str, list[str]] | None = None, ) -> str: - """Normalize a string depending on the language code""" + """Normalize a string depending on the language code + + :param stopwords: associate a language code to a list of stopwords, + stopwords will be removed from the normalized text. + + It should only be used while we are trying to extend synonym matching. + Most of the time (computing entry id), you should not account for stopwords. + """ if stopwords is None: stopwords = {} @@ -44,6 +51,7 @@ def normalize_text( line = line.strip(char) # Remove stopwords + # Be careful, this must not be used to compute entry id if lang in stopwords: stopwords = stopwords[lang] line_surrounded_by_char = char + line + char diff --git a/parser/tests/data/test.txt b/parser/tests/data/test.txt index c501a4b5..242d699b 100644 --- a/parser/tests/data/test.txt +++ b/parser/tests/data/test.txt @@ -18,7 +18,7 @@ fr: yaourts à la banane en: Passion fruit yogurts fr: yaourts au fruit de la passion -< fr:yaourts fruit de la passion +< fr:yaourts au fruit de la passion fr: yaourts au fruit de la passion allégés # meat diff --git a/parser/tests/integration/test_parse_unparse_integration.py b/parser/tests/integration/test_parse_unparse_integration.py index e2420845..25a559a7 100644 --- a/parser/tests/integration/test_parse_unparse_integration.py +++ b/parser/tests/integration/test_parse_unparse_integration.py @@ -56,7 +56,7 @@ def test_round_trip(neo4j): if line.startswith("stopwords:fr: aux"): line = "stopwords:fr: aux, au, de, le, du, la, a, et, test normalisation" # second tweak: renaming parent - elif line.startswith("< fr:yaourts fruit de la passion"): + elif line.startswith("< fr:yaourts au fruit de la passion"): line = "< en:Passion fruit yogurts" # third tweak: commenting non existing parents elif line.startswith("< en:milk"): @@ -101,7 +101,7 @@ def test_two_branch_round_trip(neo4j): if line.startswith("stopwords:fr: aux"): line = "stopwords:fr: aux, au, de, le, du, la, a, et, test normalisation" # second tweak: renaming parent - elif line.startswith("< fr:yaourts fruit de la passion"): + elif line.startswith("< fr:yaourts au fruit de la passion"): line = "< en:Passion fruit yogurts" # third tweak: commenting non existing parents elif line.startswith("< en:milk"): @@ -142,7 +142,7 @@ def test_round_trip_with_external_taxonomies(neo4j): if line.startswith("stopwords:fr: aux"): line = "stopwords:fr: aux, au, de, le, du, la, a, et, test normalisation" # second tweak: renaming parent - elif line.startswith("< fr:yaourts fruit de la passion"): + elif line.startswith("< fr:yaourts au fruit de la passion"): line = "< en:Passion fruit yogurts" expected_lines.append(line) @@ -227,7 +227,7 @@ def test_patcher_with_modifications(neo4j): result = session.run( f""" MATCH (n:p_test_branch) - WHERE n.id = "fr:yaourts-fruit-passion-alleges" + WHERE n.id = "fr:yaourts-au-fruit-de-la-passion-alleges" SET n.modified = {modified} WITH n MATCH (m:p_test_branch) @@ -236,7 +236,7 @@ def test_patcher_with_modifications(neo4j): RETURN n.id, m.id """ ) - assert result.values() == [["fr:yaourts-fruit-passion-alleges", "en:yogurts"]] + assert result.values() == [["fr:yaourts-au-fruit-de-la-passion-alleges", "en:yogurts"]] # detach the node and set the node as REMOVED result = session.run( f""" @@ -346,7 +346,7 @@ def test_patcher_with_modifications(neo4j): for num, (line, next_line) in enumerate(zip(original_lines, original_lines[1:] + [None])): more_lines = [] # changed parent - if line.startswith("< fr:yaourts fruit de la passion"): + if line.startswith("< fr:yaourts au fruit de la passion"): line = "< en:yogurts" # no more parent elif line.startswith("< en:yogurts") and next_line.startswith("en: banana yogurts"): diff --git a/parser/tests/integration/test_parser_integration.py b/parser/tests/integration/test_parser_integration.py index 92f01bb4..0949174a 100644 --- a/parser/tests/integration/test_parser_integration.py +++ b/parser/tests/integration/test_parser_integration.py @@ -56,7 +56,7 @@ def test_calling(neo4j): { "id": "synonyms:1", "tags_fr": ["fruit de la passion", "maracuja", "passion"], - "tags_ids_fr": ["fruit-passion", "maracuja", "passion"], + "tags_ids_fr": ["fruit-de-la-passion", "maracuja", "passion"], "preceding_lines": [""], "src_position": 7, }, @@ -115,7 +115,7 @@ def test_calling(neo4j): "tags_en": ["banana yogurts"], "tags_ids_en": ["banana-yogurts"], "tags_fr": ["yaourts à la banane"], - "tags_ids_fr": ["yaourts-banane"], + "tags_ids_fr": ["yaourts-a-la-banane"], "preceding_lines": [], }, { @@ -146,7 +146,7 @@ def test_calling(neo4j): expected_pairs = [ ["en:banana-yogurts", "en:yogurts"], ["en:passion-fruit-yogurts", "en:yogurts"], - ["fr:yaourts-fruit-passion-alleges", "en:passion-fruit-yogurts"], + ["fr:yaourts-au-fruit-de-la-passion-alleges", "en:passion-fruit-yogurts"], ["en:fake-meat", "en:meat"], ["en:fake-duck-meat", "en:fake-meat"], ["en:fake-duck-meat", "en:fake-stuff"], @@ -173,8 +173,8 @@ def test_calling(neo4j): ["synonyms:1", "en:yogurts"], ["en:yogurts", "en:banana-yogurts"], ["en:banana-yogurts", "en:passion-fruit-yogurts"], - ["en:passion-fruit-yogurts", "fr:yaourts-fruit-passion-alleges"], - ["fr:yaourts-fruit-passion-alleges", "en:meat"], + ["en:passion-fruit-yogurts", "fr:yaourts-au-fruit-de-la-passion-alleges"], + ["fr:yaourts-au-fruit-de-la-passion-alleges", "en:meat"], ["en:meat", "en:fake-meat"], ["en:fake-meat", "en:fake-stuff"], ["en:fake-stuff", "en:fake-duck-meat"], @@ -220,7 +220,7 @@ def test_with_external_taxonomies(neo4j): { "id": "synonyms:1", "tags_fr": ["fruit de la passion", "maracuja", "passion"], - "tags_ids_fr": ["fruit-passion", "maracuja", "passion"], + "tags_ids_fr": ["fruit-de-la-passion", "maracuja", "passion"], "preceding_lines": [""], "src_position": 7, }, @@ -279,7 +279,7 @@ def test_with_external_taxonomies(neo4j): "tags_en": ["banana yogurts"], "tags_ids_en": ["banana-yogurts"], "tags_fr": ["yaourts à la banane"], - "tags_ids_fr": ["yaourts-banane"], + "tags_ids_fr": ["yaourts-a-la-banane"], "preceding_lines": [], }, { @@ -311,7 +311,7 @@ def test_with_external_taxonomies(neo4j): ["en:yogurts", "en:milk"], ["en:banana-yogurts", "en:yogurts"], ["en:passion-fruit-yogurts", "en:yogurts"], - ["fr:yaourts-fruit-passion-alleges", "en:passion-fruit-yogurts"], + ["fr:yaourts-au-fruit-de-la-passion-alleges", "en:passion-fruit-yogurts"], ["en:fake-meat", "en:meat"], ["en:fake-duck-meat", "en:fake-meat"], ["en:fake-duck-meat", "en:fake-stuff"], @@ -338,8 +338,8 @@ def test_with_external_taxonomies(neo4j): ["synonyms:1", "en:yogurts"], ["en:yogurts", "en:banana-yogurts"], ["en:banana-yogurts", "en:passion-fruit-yogurts"], - ["en:passion-fruit-yogurts", "fr:yaourts-fruit-passion-alleges"], - ["fr:yaourts-fruit-passion-alleges", "en:meat"], + ["en:passion-fruit-yogurts", "fr:yaourts-au-fruit-de-la-passion-alleges"], + ["fr:yaourts-au-fruit-de-la-passion-alleges", "en:meat"], ["en:meat", "en:fake-meat"], ["en:fake-meat", "en:fake-stuff"], ["en:fake-stuff", "en:fake-duck-meat"], @@ -406,7 +406,7 @@ def test_properties_confused_lang(neo4j, tmp_path): pathlib.Path(__file__).parent.parent / "data" / "test_property_confused_lang.txt" ) test_parser(fpath, None, "branch", "test") - query = "MATCH (n:p_test_branch) WHERE n.id = 'en:1-for-planet' RETURN n" + query = "MATCH (n:p_test_branch) WHERE n.id = 'en:1-for-the-planet' RETURN n" result = session.run(query) node = result.value()[0] # "web:en" was not confused with a language prefix "web:" From 931ce1d32874b7d2858cab61058800637dd3e4ca Mon Sep 17 00:00:00 2001 From: Alex Garel Date: Fri, 2 May 2025 19:55:51 +0200 Subject: [PATCH 2/2] chore: lint --- backend/editor/controllers/node_controller.py | 3 +- backend/editor/models/node_models.py | 4 +- .../expected_results/test_delete_project.json | 2 +- .../test_upload_taxonomy.json | 886 ++++-------------- .../parser/taxonomy_parser.py | 3 +- 5 files changed, 203 insertions(+), 695 deletions(-) diff --git a/backend/editor/controllers/node_controller.py b/backend/editor/controllers/node_controller.py index dd87e914..59ee27c9 100644 --- a/backend/editor/controllers/node_controller.py +++ b/backend/editor/controllers/node_controller.py @@ -23,8 +23,7 @@ async def delete_project_nodes(project_id: str): await get_current_transaction().run(query) -async def create_entry_node( - project_id: str, entry_node: EntryNodeCreate) -> str: +async def create_entry_node(project_id: str, entry_node: EntryNodeCreate) -> str: """ Creates a new entry node in the database """ diff --git a/backend/editor/models/node_models.py b/backend/editor/models/node_models.py index 72b2eb4f..2c8d3715 100644 --- a/backend/editor/models/node_models.py +++ b/backend/editor/models/node_models.py @@ -64,9 +64,7 @@ def recompute_tags_ids(self): keys_language_code = key.split("_", 1)[1] normalised_value = [] for value in values: - normalised_value.append( - parser_utils.normalize_text(value, keys_language_code) - ) + normalised_value.append(parser_utils.normalize_text(value, keys_language_code)) self.tags_ids["tags_ids" + key[4:]] = normalised_value def recompute_id(self): diff --git a/backend/tests/expected_results/test_delete_project.json b/backend/tests/expected_results/test_delete_project.json index 339881b3..eac4145a 100644 --- a/backend/tests/expected_results/test_delete_project.json +++ b/backend/tests/expected_results/test_delete_project.json @@ -1,4 +1,4 @@ { "nodes": [], "relations": [] -} \ No newline at end of file +} diff --git a/backend/tests/expected_results/test_upload_taxonomy.json b/backend/tests/expected_results/test_upload_taxonomy.json index 71622b4a..aa9da83c 100644 --- a/backend/tests/expected_results/test_upload_taxonomy.json +++ b/backend/tests/expected_results/test_upload_taxonomy.json @@ -2,82 +2,34 @@ "nodes": [ { "id": "__header__", - "labels": [ - "p_test_taxonomy_test_branch", - "TEXT" - ], - "preceding_lines": [ - "# test taxonomy" - ], - "src_lines": [ - "1,1" - ], + "labels": ["p_test_taxonomy_test_branch", "TEXT"], + "preceding_lines": ["# test taxonomy"], + "src_lines": ["1,1"], "src_position": 1 }, { "id": "stopwords:0", - "labels": [ - "STOPWORDS", - "p_test_taxonomy_test_branch" - ], + "labels": ["STOPWORDS", "p_test_taxonomy_test_branch"], "preceding_lines": [], - "src_lines": [ - "3,3" - ], + "src_lines": ["3,3"], "src_position": 3, - "tags_fr": [ - "aux", - "au", - "de", - "le", - "du", - "la", - "a", - "et" - ], - "tags_ids_fr": [ - "aux", - "au", - "de", - "le", - "du", - "la", - "a", - "et" - ] + "tags_fr": ["aux", "au", "de", "le", "du", "la", "a", "et"], + "tags_ids_fr": ["aux", "au", "de", "le", "du", "la", "a", "et"] }, { "id": "synonyms:0", - "labels": [ - "SYNONYMS", - "p_test_taxonomy_test_branch" - ], + "labels": ["SYNONYMS", "p_test_taxonomy_test_branch"], "preceding_lines": [], - "src_lines": [ - "5,5" - ], + "src_lines": ["5,5"], "src_position": 5, - "tags_en": [ - "passion fruit", - "passionfruit" - ], - "tags_ids_en": [ - "passion-fruit", - "passionfruit" - ] + "tags_en": ["passion fruit", "passionfruit"], + "tags_ids_en": ["passion-fruit", "passionfruit"] }, { "id": "synonyms:1", - "labels": [ - "SYNONYMS", - "p_test_taxonomy_test_branch" - ], - "preceding_lines": [ - "" - ], - "src_lines": [ - "6,7" - ], + "labels": ["SYNONYMS", "p_test_taxonomy_test_branch"], + "preceding_lines": [""], + "src_lines": ["6,7"], "src_position": 7, "tags_fr": [ "fruit de la passion", @@ -95,10 +47,7 @@ { "id": "en:yogurts", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], @@ -106,42 +55,19 @@ "prop_description_en": "a yogurts of whatever type", "prop_description_fr": "un yaourt de n'importe quel type", "prop_flavour_en": "undef", - "src_lines": [ - "9,15" - ], + "src_lines": ["9,15"], "src_position": 9, - "tags_en": [ - "yogurts", - "yoghurts" - ], - "tags_fr": [ - "yaourts", - "yoghourts", - "yogourts" - ], - "tags_ids_en": [ - "yogurts", - "yoghurts" - ], - "tags_ids_fr": [ - "yaourts", - "yoghourts", - "yogourts" - ], - "tags_ids_nl": [ - "yoghurts" - ], - "tags_nl": [ - "yoghurts" - ] + "tags_en": ["yogurts", "yoghurts"], + "tags_fr": ["yaourts", "yoghourts", "yogourts"], + "tags_ids_en": ["yogurts", "yoghurts"], + "tags_ids_fr": ["yaourts", "yoghourts", "yogourts"], + "tags_ids_nl": ["yoghurts"], + "tags_nl": ["yoghurts"] }, { "id": "en:banana-yogurts", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], @@ -149,92 +75,49 @@ "prop_description_en": "a banana yogurt", "prop_description_fr": "un yaourt \u00e0 la banane", "prop_flavour_en": "banana", - "src_lines": [ - "17,24" - ], + "src_lines": ["17,24"], "src_position": 17, - "tags_en": [ - "banana yogurts" - ], - "tags_fr": [ - "yaourts \u00e0 la banane" - ], - "tags_ids_en": [ - "banana-yogurts" - ], - "tags_ids_fr": [ - "yaourts-a-la-banane" - ], - "tags_ids_nl": [ - "bananenyoghurt" - ], - "tags_nl": [ - "bananenyoghurt" - ] + "tags_en": ["banana yogurts"], + "tags_fr": ["yaourts \u00e0 la banane"], + "tags_ids_en": ["banana-yogurts"], + "tags_ids_fr": ["yaourts-a-la-banane"], + "tags_ids_nl": ["bananenyoghurt"], + "tags_nl": ["bananenyoghurt"] }, { "id": "en:passion-fruit-yogurts", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], "prop_color_en": "undef", "prop_flavour_en": "passion fruit", - "src_lines": [ - "26,31" - ], + "src_lines": ["26,31"], "src_position": 26, - "tags_en": [ - "Passion fruit yogurts" - ], - "tags_fr": [ - "yaourts au fruit de la passion" - ], - "tags_ids_en": [ - "passion-fruit-yogurts" - ], - "tags_ids_fr": [ - "yaourts-au-fruit-de-la-passion" - ], - "tags_ids_nl": [ - "yoghurts-met-passievrucht" - ], - "tags_nl": [ - "yoghurts met passievrucht" - ] + "tags_en": ["Passion fruit yogurts"], + "tags_fr": ["yaourts au fruit de la passion"], + "tags_ids_en": ["passion-fruit-yogurts"], + "tags_ids_fr": ["yaourts-au-fruit-de-la-passion"], + "tags_ids_nl": ["yoghurts-met-passievrucht"], + "tags_nl": ["yoghurts met passievrucht"] }, { "id": "fr:yaourts-alleges", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "fr", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "33,34" - ], + "src_lines": ["33,34"], "src_position": 33, - "tags_fr": [ - "yaourts all\u00e9g\u00e9s" - ], - "tags_ids_fr": [ - "yaourts-alleges" - ] + "tags_fr": ["yaourts all\u00e9g\u00e9s"], + "tags_ids_fr": ["yaourts-alleges"] }, { "id": "en:lemon-yogurts", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], @@ -242,191 +125,104 @@ "prop_description_en": "a yogurts with lemon inside", "prop_description_fr": "un yaourt avec du citron", "prop_flavour_en": "lemon", - "src_lines": [ - "36,43" - ], + "src_lines": ["36,43"], "src_position": 36, - "tags_en": [ - "lemon yogurts" - ], - "tags_fr": [ - "yaourts au citron" - ], - "tags_ids_en": [ - "lemon-yogurts" - ], - "tags_ids_fr": [ - "yaourts-au-citron" - ], - "tags_ids_nl": [ - "yoghurts-met-citroen" - ], - "tags_nl": [ - "yoghurts met citroen" - ] + "tags_en": ["lemon yogurts"], + "tags_fr": ["yaourts au citron"], + "tags_ids_en": ["lemon-yogurts"], + "tags_ids_fr": ["yaourts-au-citron"], + "tags_ids_nl": ["yoghurts-met-citroen"], + "tags_nl": ["yoghurts met citroen"] }, { "id": "fr:yaourts-au-fruit-de-la-passion-alleges", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "fr", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "45,48" - ], + "src_lines": ["45,48"], "src_position": 45, - "tags_fr": [ - "yaourts au fruit de la passion all\u00e9g\u00e9s" - ], - "tags_ids_fr": [ - "yaourts-au-fruit-de-la-passion-alleges" - ], - "tags_ids_nl": [ - "magere-yoghurts-met-passievrucht" - ], - "tags_nl": [ - "magere yoghurts met passievrucht" - ] + "tags_fr": ["yaourts au fruit de la passion all\u00e9g\u00e9s"], + "tags_ids_fr": ["yaourts-au-fruit-de-la-passion-alleges"], + "tags_ids_nl": ["magere-yoghurts-met-passievrucht"], + "tags_nl": ["magere yoghurts met passievrucht"] }, { "id": "fr:yaourts-au-citron-alleges", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "fr", "original_taxonomy": "test.txt", - "preceding_lines": [ - "" - ], + "preceding_lines": [""], "prop_description_en": "for light yogurts with lemon", - "src_lines": [ - "50,55" - ], + "src_lines": ["50,55"], "src_position": 51, - "tags_fr": [ - "yaourts au citron all\u00e9g\u00e9s" - ], - "tags_ids_fr": [ - "yaourts-au-citron-alleges" - ], - "tags_ids_nl": [ - "magere-citroenyoghurt" - ], - "tags_nl": [ - "magere citroenyoghurt" - ] + "tags_fr": ["yaourts au citron all\u00e9g\u00e9s"], + "tags_ids_fr": ["yaourts-au-citron-alleges"], + "tags_ids_nl": ["magere-citroenyoghurt"], + "tags_nl": ["magere citroenyoghurt"] }, { "id": "fr:yaourts-a-la-myrtille", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "fr", "original_taxonomy": "test.txt", - "parent_comments": [ - "# < fr:yaourt" - ], + "parent_comments": ["# < fr:yaourt"], "preceding_lines": [], "prop_flavour_en": "blueberry", "prop_flavour_fr": "myrtille", - "src_lines": [ - "57,61" - ], + "src_lines": ["57,61"], "src_position": 57, - "tags_fr": [ - "yaourts \u00e0 la myrtille" - ], - "tags_ids_fr": [ - "yaourts-a-la-myrtille" - ], - "tags_ids_nl": [ - "bosbessenyoghurt" - ], - "tags_nl": [ - "bosbessenyoghurt" - ] + "tags_fr": ["yaourts \u00e0 la myrtille"], + "tags_ids_fr": ["yaourts-a-la-myrtille"], + "tags_ids_nl": ["bosbessenyoghurt"], + "tags_nl": ["bosbessenyoghurt"] }, { "id": "en:meat", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], "prop_carbon_footprint_fr_foodges_value_fr": "10", "prop_vegan_en": "no", - "src_lines": [ - "63,65" - ], + "src_lines": ["63,65"], "src_position": 63, - "tags_en": [ - "meat" - ], - "tags_ids_en": [ - "meat" - ] + "tags_en": ["meat"], + "tags_ids_en": ["meat"] }, { "id": "en:beef", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], "prop_carbon_footprint_fr_foodges_value_fr": "15", - "src_lines": [ - "67,69" - ], + "src_lines": ["67,69"], "src_position": 67, - "tags_en": [ - "beef" - ], - "tags_ids_en": [ - "beef" - ] + "tags_en": ["beef"], + "tags_ids_en": ["beef"] }, { "id": "en:roast-beef", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "71,72" - ], + "src_lines": ["71,72"], "src_position": 71, - "tags_en": [ - "roast-beef" - ], - "tags_ids_en": [ - "roast-beef" - ] + "tags_en": ["roast-beef"], + "tags_ids_en": ["roast-beef"] }, { "id": "en:fake-meat", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], @@ -435,104 +231,62 @@ "# undef will stop parents from transmitting a value" ], "prop_vegan_en": "yes", - "src_lines": [ - "74,78" - ], + "src_lines": ["74,78"], "src_position": 74, - "tags_en": [ - "fake-meat" - ], - "tags_ids_en": [ - "fake-meat" - ] + "tags_en": ["fake-meat"], + "tags_ids_en": ["fake-meat"] }, { "id": "en:fake-stuff", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "80,80" - ], + "src_lines": ["80,80"], "src_position": 80, - "tags_en": [ - "fake-stuff" - ], - "tags_ids_en": [ - "fake-stuff" - ] + "tags_en": ["fake-stuff"], + "tags_ids_en": ["fake-stuff"] }, { "id": "en:fake-duck-meat", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "82,84" - ], + "src_lines": ["82,84"], "src_position": 82, - "tags_en": [ - "fake-duck-meat" - ], - "tags_ids_en": [ - "fake-duck-meat" - ] + "tags_en": ["fake-duck-meat"], + "tags_ids_en": ["fake-duck-meat"] }, { "id": "en:vegetable", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], "prop_vegan_en": "yes", - "src_lines": [ - "86,87" - ], + "src_lines": ["86,87"], "src_position": 86, - "tags_en": [ - "vegetable" - ], - "tags_ids_en": [ - "vegetable" - ] + "tags_en": ["vegetable"], + "tags_ids_en": ["vegetable"] }, { "id": "en:soup", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [ "# the soup yogourt synonym is used to test suggestions matching xx: synonyms" ], "prop_vegan_en": "maybe", - "src_lines": [ - "89,92" - ], + "src_lines": ["89,92"], "src_position": 90, - "tags_en": [ - "soup" - ], - "tags_ids_en": [ - "soup" - ], + "tags_en": ["soup"], + "tags_ids_en": ["soup"], "tags_ids_xx": [ "something-that-means-soup-in-every-language", "something-else-that-means-soup-in-every-language" @@ -545,130 +299,71 @@ { "id": "en:vegan-soup", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], "prop_vegan_en": "yes", - "src_lines": [ - "94,96" - ], + "src_lines": ["94,96"], "src_position": 94, - "tags_en": [ - "vegan-soup" - ], - "tags_ids_en": [ - "vegan-soup" - ] + "tags_en": ["vegan-soup"], + "tags_ids_en": ["vegan-soup"] }, { "id": "en:fish-soup", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], "prop_vegan_en": "no", - "src_lines": [ - "98,100" - ], + "src_lines": ["98,100"], "src_position": 98, - "tags_en": [ - "fish-soup" - ], - "tags_ids_en": [ - "fish-soup" - ] + "tags_en": ["fish-soup"], + "tags_ids_en": ["fish-soup"] }, { "id": "de:sp\u00e4tzle", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "de", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "102,102" - ], + "src_lines": ["102,102"], "src_position": 102, - "tags_de": [ - "Sp\u00e4tzle" - ], - "tags_ids_de": [ - "sp\u00e4tzle" - ] + "tags_de": ["Sp\u00e4tzle"], + "tags_ids_de": ["sp\u00e4tzle"] }, { "id": "en:kale", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "104,105" - ], + "src_lines": ["104,105"], "src_position": 104, - "tags_de": [ - "Gr\u00fcnkohl" - ], - "tags_en": [ - "Kale" - ], - "tags_ids_de": [ - "gr\u00fcnkohl" - ], - "tags_ids_en": [ - "kale" - ] + "tags_de": ["Gr\u00fcnkohl"], + "tags_en": ["Kale"], + "tags_ids_de": ["gr\u00fcnkohl"], + "tags_ids_en": ["kale"] }, { "id": "en:kefir-2.5", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "107,110" - ], + "src_lines": ["107,110"], "src_position": 107, - "tags_de": [ - "Kefir 2.5 %" - ], - "tags_en": [ - "Kefir 2.5 %" - ], - "tags_fr": [ - "K\u00e9fir 2,5 %" - ], - "tags_ids_de": [ - "kefir-2.5" - ], - "tags_ids_en": [ - "kefir-2.5" - ], - "tags_ids_fr": [ - "kefir-2-5" - ], - "tags_ids_ru": [ - "\u043a\u0435\u0444\u0438\u0440-2.5" - ], + "tags_de": ["Kefir 2.5 %"], + "tags_en": ["Kefir 2.5 %"], + "tags_fr": ["K\u00e9fir 2,5 %"], + "tags_ids_de": ["kefir-2.5"], + "tags_ids_en": ["kefir-2.5"], + "tags_ids_fr": ["kefir-2-5"], + "tags_ids_ru": ["\u043a\u0435\u0444\u0438\u0440-2.5"], "tags_ru": [ "\u041a\u0435\u0444\u0438\u0440 2.5 %", "\u041a\u0435\u0444\u0438\u0440 2.5%" @@ -677,141 +372,77 @@ { "id": "fr:french-entry", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "fr", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "112,113" - ], + "src_lines": ["112,113"], "src_position": 112, - "tags_de": [ - "Special value for German" - ], - "tags_fr": [ - "French entry" - ], - "tags_ids_de": [ - "special-value-for-german" - ], - "tags_ids_fr": [ - "french-entry" - ] + "tags_de": ["Special value for German"], + "tags_fr": ["French entry"], + "tags_ids_de": ["special-value-for-german"], + "tags_ids_fr": ["french-entry"] }, { "id": "fr:french-entry-with-default-value", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "fr", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "115,117" - ], + "src_lines": ["115,117"], "src_position": 115, - "tags_de": [ - "Special value for German 2" - ], - "tags_fr": [ - "French entry with default value" - ], - "tags_ids_de": [ - "special-value-for-german-2" - ], - "tags_ids_fr": [ - "french-entry-with-default-value" - ], - "tags_ids_xx": [ - "french-entry-with-default-value" - ], - "tags_xx": [ - "French entry with default value" - ] + "tags_de": ["Special value for German 2"], + "tags_fr": ["French entry with default value"], + "tags_ids_de": ["special-value-for-german-2"], + "tags_ids_fr": ["french-entry-with-default-value"], + "tags_ids_xx": ["french-entry-with-default-value"], + "tags_xx": ["French entry with default value"] }, { "id": "xx:language-less-entry", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "xx", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "119,120" - ], + "src_lines": ["119,120"], "src_position": 119, - "tags_de": [ - "Special value for German 3" - ], - "tags_ids_de": [ - "special-value-for-german-3" - ], - "tags_ids_xx": [ - "language-less-entry" - ], - "tags_xx": [ - "Language-less entry" - ] + "tags_de": ["Special value for German 3"], + "tags_ids_de": ["special-value-for-german-3"], + "tags_ids_xx": ["language-less-entry"], + "tags_xx": ["Language-less entry"] }, { "id": "sv:\u00e4-m\u00e4rket", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "sv", "original_taxonomy": "test.txt", "preceding_lines": [ "# xx: entry with accents, need to match unaccented version" ], - "src_lines": [ - "122,124" - ], + "src_lines": ["122,124"], "src_position": 123, - "tags_ids_sv": [ - "\u00e4-m\u00e4rket" - ], - "tags_ids_xx": [ - "\u00e4-m\u00e4rket" - ], - "tags_sv": [ - "\u00c4-m\u00e4rket" - ], - "tags_xx": [ - "\u00c4-m\u00e4rket" - ] + "tags_ids_sv": ["\u00e4-m\u00e4rket"], + "tags_ids_xx": ["\u00e4-m\u00e4rket"], + "tags_sv": ["\u00c4-m\u00e4rket"], + "tags_xx": ["\u00c4-m\u00e4rket"] }, { "id": "__footer__", - "labels": [ - "p_test_taxonomy_test_branch", - "TEXT" - ], + "labels": ["p_test_taxonomy_test_branch", "TEXT"], "preceding_lines": [], "src_position": 125 }, { "id": "en:entry-with-parentheses-and-some-characters", "is_external": false, - "labels": [ - "ENTRY", - "p_test_taxonomy_test_branch" - ], + "labels": ["ENTRY", "p_test_taxonomy_test_branch"], "main_language": "en", "original_taxonomy": "test.txt", "preceding_lines": [], - "src_lines": [ - "126,124" - ], + "src_lines": ["126,124"], "src_position": 126, "tags_en": [ "Entry with (parentheses) and some *!#{}@$ characters", @@ -829,9 +460,7 @@ "errors_count": 1, "id": "p_test_taxonomy_test_branch", "is_from_github": false, - "labels": [ - "PROJECT" - ], + "labels": ["PROJECT"], "original_text": "# test taxonomy\n\nstopwords:fr: aux,au,de,le,du,la,a,et\n\nsynonyms:en: passion fruit, passionfruit\n\nsynonyms:fr: fruit de la passion, fruits de la passion, maracuja, passion\n\nen: yogurts, yoghurts\nfr: yaourts, yoghourts, yogourts\nnl: yoghurts\ndescription:en: a yogurts of whatever type\ndescription:fr: un yaourt de n'importe quel type\ncolor:en: white\nflavour:en: undef\n\n list[Node # because two tags can have the same normalized value language_code = key.split("_")[1] first_node.tags[f"tags_ids_{language_code}"] = [ - normalize_text(tag, language_code) - for tag in first_node.tags[key] + normalize_text(tag, language_code) for tag in first_node.tags[key] ] for key, value in node.properties.items(): # overwrite the value if the property already exists in the first node