diff --git a/.github/workflows/github-projects-for-openfoodfacts-design.yml b/.github/workflows/github-projects-for-openfoodfacts-design.yml index f1de1463..cc99c59f 100644 --- a/.github/workflows/github-projects-for-openfoodfacts-design.yml +++ b/.github/workflows/github-projects-for-openfoodfacts-design.yml @@ -59,13 +59,13 @@ jobs: project-url: https://github.com/orgs/openfoodfacts/projects/5 # Add issue to the folksonomy project github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} labeled: 🏷️ Folksonomy Project - label-operator: OR + label-operator: OR - uses: actions/add-to-project@main with: project-url: https://github.com/orgs/openfoodfacts/projects/44 # Add issue to the data quality project github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} labeled: 🧽 Data quality - label-operator: OR + label-operator: OR - uses: actions/add-to-project@main with: project-url: https://github.com/orgs/openfoodfacts/projects/82 # Add issue to the search project @@ -77,19 +77,19 @@ jobs: project-url: https://github.com/orgs/openfoodfacts/projects/41 # Add issue to the producer platform project github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} labeled: 🏭 Producers Platform - label-operator: OR + label-operator: OR - uses: actions/add-to-project@main with: project-url: https://github.com/orgs/openfoodfacts/projects/19 # Add issue to the infrastructure project github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} labeled: infrastructure - label-operator: OR + label-operator: OR - uses: actions/add-to-project@main with: project-url: https://github.com/orgs/openfoodfacts/projects/92 # Add issue to the Nutri-Score project github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} labeled: 🚦 Nutri-Score - label-operator: OR + label-operator: OR - uses: actions/add-to-project@main with: project-url: https://github.com/orgs/openfoodfacts/projects/132 # Add issue to the Top upvoted issues board @@ -107,4 +107,4 @@ jobs: project-url: https://github.com/orgs/openfoodfacts/projects/35 # Add issue to the ♿️ accessibility project github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} labeled: ♿️ accessibility - label-operator: OR + label-operator: OR diff --git a/Makefile b/Makefile index 72b8c2fe..e730c36c 100644 --- a/Makefile +++ b/Makefile @@ -99,9 +99,14 @@ local_config_quality: ## Run on lint configuration files build: ## Build docker images @echo "🍜 Building docker images" - ${DOCKER_COMPOSE} build + ${DOCKER_COMPOSE} build ${args} @echo "🍜 Project setup done" +backend_poetry_update: ## Update poetry.lock file + @echo "🍜 Updating poetry.lock file" + ${DOCKER_COMPOSE} run --user root --rm taxonomy_api bash -c "pip install poetry==1.4.2 && poetry lock --no-update" + + up: ## Run the project @echo "🍜 Running project (ctrl+C to stop)" @echo "🍜 The React app will be available on http://ui.taxonomy.localhost:8091" @@ -177,11 +182,11 @@ config_quality: ## Run quality checks on configuration files tests: backend_tests ## Run all tests -backend_tests: ## Run python tests +backend_tests: ## Run python tests, you might provide additional arguments with args="…" @echo "🍜 Running python tests" ${DOCKER_COMPOSE_TEST} up -d neo4j - ${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /parser - ${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /code/tests + ${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /parser ${args} + ${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /code/tests ${args} ${DOCKER_COMPOSE_TEST} stop neo4j checks: quality tests ## Run all checks (quality + tests) diff --git a/backend/editor/api.py b/backend/editor/api.py index 50f721ea..962be4a2 100644 --- a/backend/editor/api.py +++ b/backend/editor/api.py @@ -184,7 +184,7 @@ async def find_all_nodes(response: Response, branch: str, taxonomy_name: str): Get all nodes within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - all_nodes = await taxonomy.get_all_nodes("") + all_nodes = await taxonomy.get_all_nodes() return all_nodes @@ -235,7 +235,7 @@ async def find_one_synonym(response: Response, branch: str, taxonomy_name: str, Get synonym corresponding to id within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - one_synonym = await taxonomy.get_nodes("SYNONYMS", synonym) + one_synonym = await taxonomy.get_nodes(NodeType.SYNONYMS, synonym) check_single(one_synonym) @@ -248,7 +248,7 @@ async def find_all_synonyms(response: Response, branch: str, taxonomy_name: str) Get all synonyms within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - all_synonyms = await taxonomy.get_all_nodes("SYNONYMS") + all_synonyms = await taxonomy.get_all_nodes(NodeType.SYNONYMS) return all_synonyms @@ -258,7 +258,7 @@ async def find_one_stopword(response: Response, branch: str, taxonomy_name: str, Get stopword corresponding to id within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - one_stopword = await taxonomy.get_nodes("STOPWORDS", stopword) + one_stopword = await taxonomy.get_nodes(NodeType.STOPWORDS, stopword) check_single(one_stopword) @@ -271,7 +271,7 @@ async def find_all_stopwords(response: Response, branch: str, taxonomy_name: str Get all stopwords within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - all_stopwords = await taxonomy.get_all_nodes("STOPWORDS") + all_stopwords = await taxonomy.get_all_nodes(NodeType.STOPWORDS) return all_stopwords @@ -281,7 +281,7 @@ async def find_header(response: Response, branch: str, taxonomy_name: str): Get __header__ within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - header = await taxonomy.get_nodes("TEXT", "__header__") + header = await taxonomy.get_nodes(NodeType.TEXT, "__header__") return header[0] @@ -291,7 +291,7 @@ async def find_footer(response: Response, branch: str, taxonomy_name: str): Get __footer__ within taxonomy """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - footer = await taxonomy.get_nodes("TEXT", "__footer__") + footer = await taxonomy.get_nodes(NodeType.TEXT, "__footer__") return footer[0] @@ -395,7 +395,7 @@ async def upload_taxonomy( """ taxonomy = TaxonomyGraph(branch, taxonomy_name) if not taxonomy.is_valid_branch_name(): - raise HTTPException(status_code=422, detail="branch_name: Enter a valid branch name!") + raise HTTPException(status_code=422, detail="branch_name: Enter a valid branch name!") if await taxonomy.does_project_exist(): raise HTTPException(status_code=409, detail="Project already exists!") if not await taxonomy.is_branch_unique(from_github=False): @@ -431,7 +431,7 @@ async def edit_entry(request: Request, branch: str, taxonomy_name: str, entry: s incoming_data = await request.json() incoming_data["id"] = entry new_entry = EntryNode(**incoming_data) - updated_entry = await taxonomy.update_node("ENTRY", new_entry) + updated_entry = await taxonomy.update_node(NodeType.ENTRY, new_entry) return updated_entry @@ -467,5 +467,5 @@ async def delete_project(branch: str, taxonomy_name: str): """ Delete a project """ - taxonomy = TaxonomyGraph(branch, taxonomy_name) - await project_controller.delete_project(taxonomy.project_name) + project_id = project_controller.get_project_id(branch, taxonomy_name) + await project_controller.delete_project(project_id) diff --git a/backend/editor/controllers/node_controller.py b/backend/editor/controllers/node_controller.py index fb33ab4a..0d020953 100644 --- a/backend/editor/controllers/node_controller.py +++ b/backend/editor/controllers/node_controller.py @@ -1,3 +1,4 @@ +import datetime import logging from openfoodfacts_taxonomy_parser import utils as parser_utils @@ -12,7 +13,7 @@ async def delete_project_nodes(project_id: str): """ Remove all nodes for project. - This includes entries, stopwords, synonyms and errors + This includes entries, stopwords, synonyms, errors and removed entries """ query = f""" @@ -38,6 +39,8 @@ async def create_entry_node( "id": language_code + ":" + normalized_name, f"tags_{language_code}": [name], f"tags_ids_{language_code}": [normalized_name], + "modified": datetime.datetime.now().timestamp(), + "is_external": False, } params = {"entry_node": entry_node_data} diff --git a/backend/editor/controllers/project_controller.py b/backend/editor/controllers/project_controller.py index b8170356..794da84f 100644 --- a/backend/editor/controllers/project_controller.py +++ b/backend/editor/controllers/project_controller.py @@ -4,6 +4,10 @@ from .utils.result_utils import get_unique_record +def get_project_id(branch_name: str, taxonomy_name: str) -> str: + return "p_" + taxonomy_name + "_" + branch_name + + async def get_project(project_id: str) -> Project: """ Get project by id @@ -78,3 +82,42 @@ async def delete_project(project_id: str): params = {"project_id": project_id} await get_current_transaction().run(query, params) await delete_project_nodes(project_id) + + +async def clone_project(source_branch: str, taxonomy_name: str, target_branch: str): + """ + Clone a project using a new branch name + + Currently used for tests only. + """ + source_id = get_project_id(source_branch, taxonomy_name) + target_id = get_project_id(target_branch, taxonomy_name) + # clone project node + query = """ + MATCH (p:PROJECT {id: $project_id}) + WITH p + CALL apoc.refactor.cloneNodes([p], true, ['id', 'branch'] ) + YIELD output as new_node + WITH new_node + SET new_node.created_at = datetime(), + new_node.branch_name = $target_branch, + new_node.id = $target_id + RETURN new_node + """ + params = { + "project_id": source_id, + "target_branch": target_branch, + "target_id": get_project_id(target_branch, taxonomy_name), + } + await get_current_transaction().run(query, params) + # clone nodes thanks to apoc.refactor.cloneSubgraph + query = f""" + MATCH (n:{source_id}) + WITH collect(n) AS source_nodes + CALL apoc.refactor.cloneSubgraph(source_nodes) + YIELD output as new_node + WITH new_node + REMOVE new_node:{source_id} + SET new_node:{target_id} + """ + await get_current_transaction().run(query) diff --git a/backend/editor/entries.py b/backend/editor/entries.py index 6946d593..39262b1b 100644 --- a/backend/editor/entries.py +++ b/backend/editor/entries.py @@ -3,10 +3,12 @@ """ import asyncio +import datetime import logging import shutil import tempfile import urllib.request # Sending requests +from typing import Optional from fastapi import BackgroundTasks, HTTPException, UploadFile @@ -14,11 +16,17 @@ from fastapi.concurrency import run_in_threadpool from openfoodfacts_taxonomy_parser import parser # Parser for taxonomies from openfoodfacts_taxonomy_parser import unparser # Unparser for taxonomies +from openfoodfacts_taxonomy_parser import patcher from openfoodfacts_taxonomy_parser import utils as parser_utils from . import settings, utils from .controllers.node_controller import create_entry_node, get_error_node -from .controllers.project_controller import create_project, edit_project, get_project +from .controllers.project_controller import ( + create_project, + edit_project, + get_project, + get_project_id, +) from .exceptions import GithubBranchExistsError # Custom exceptions from .exceptions import ( GithubUploadError, @@ -32,7 +40,7 @@ TransactionCtx, get_current_transaction, ) -from .models.node_models import EntryNode, EntryNodeCreate +from .models.node_models import EntryNode, EntryNodeCreate, NodeType from .models.project_models import ProjectCreate, ProjectEdit, ProjectStatus from .settings import EXTERNAL_TAXONOMIES @@ -49,23 +57,23 @@ class TaxonomyGraph: def __init__(self, branch_name, taxonomy_name): self.taxonomy_name = taxonomy_name self.branch_name = branch_name - self.project_name = "p_" + taxonomy_name + "_" + branch_name + self.project_name = get_project_id(branch_name, taxonomy_name) def taxonomy_path_in_repository(self, taxonomy_name): return utils.taxonomy_path_in_repository(taxonomy_name) - def get_label(self, id): + def get_label(self, id) -> NodeType: """ Helper function for getting the label for a given id """ if id.startswith("stopword"): - return "STOPWORDS" + return NodeType.STOPWORDS elif id.startswith("synonym"): - return "SYNONYMS" + return NodeType.SYNONYMS elif id.startswith("__header__") or id.startswith("__footer__"): - return "TEXT" + return NodeType.TEXT else: - return "ENTRY" + return NodeType.ENTRY async def create_entry_node(self, name, main_language_code) -> str: """ @@ -199,18 +207,22 @@ async def import_taxonomy( background_tasks.add_task(self.get_and_parse_taxonomy, uploadfile) return True - def dump_taxonomy(self, background_tasks: BackgroundTasks): + def dump_taxonomy( + self, + background_tasks: BackgroundTasks, + dump_cls: unparser.WriteTaxonomy = patcher.PatchTaxonomy, + ): """ Helper function to create the txt file of a taxonomy """ # Create unparser object and pass a sync session to it with SyncTransactionCtx() as session: - unparser_object = unparser.WriteTaxonomy(session) + dumper = dump_cls(session) # Creates a unique file for dumping the taxonomy filename = self.project_name + ".txt" try: # Dump taxonomy with given file name and branch name - unparser_object(filename, self.branch_name, self.taxonomy_name) + dumper(filename, self.branch_name, self.taxonomy_name) # Program file removal in the background background_tasks.add_task(utils.file_cleanup, filename) return filename @@ -340,9 +352,9 @@ async def list_projects(self, status=None): return [item async for result_list in query_result for item in result_list] - async def add_node_to_end(self, label, entry): + async def add_node_to_end(self, label: NodeType, entry): """ - Helper function which adds an existing node to end of taxonomy + Helper function which adds an a newly created node to end of taxonomy """ # Delete relationship between current last node and __footer__ query = f""" @@ -357,8 +369,10 @@ async def add_node_to_end(self, label, entry): # Rebuild relationships by inserting incoming node at the end query = [] query = f""" - MATCH (new_node:{self.project_name}:{label}) WHERE new_node.id = $id - MATCH (last_node:{self.project_name}:{end_node_label}) WHERE last_node.id = $endnodeid + MATCH (new_node:{self.project_name}:{label.value}) + WHERE new_node.id = $id + MATCH (last_node:{self.project_name}:{end_node_label.value}) + WHERE last_node.id = $endnodeid MATCH (footer:{self.project_name}:TEXT) WHERE footer.id = "__footer__" CREATE (last_node)-[:is_before]->(new_node) CREATE (new_node)-[:is_before]->(footer) @@ -366,7 +380,7 @@ async def add_node_to_end(self, label, entry): await get_current_transaction().run(query, {"id": entry, "endnodeid": end_node["id"]}) # UNUSED FOR NOW - async def add_node_to_beginning(self, label, entry): + async def add_node_to_beginning(self, label: NodeType, entry): """ Helper function which adds an existing node to beginning of taxonomy """ @@ -382,8 +396,8 @@ async def add_node_to_beginning(self, label, entry): # Rebuild relationships by inserting incoming node at the beginning query = f""" - MATCH (new_node:{self.project_name}:{label}) WHERE new_node.id = $id - MATCH (first_node:{self.project_name}:{start_node_label}) + MATCH (new_node:{self.project_name}:{label.value}) WHERE new_node.id = $id + MATCH (first_node:{self.project_name}:{start_node_label.value}) WHERE first_node.id = $startnodeid MATCH (header:{self.project_name}:TEXT) WHERE header.id = "__header__" CREATE (new_node)-[:is_before]->(first_node) @@ -391,31 +405,70 @@ async def add_node_to_beginning(self, label, entry): """ await get_current_transaction().run(query, {"id": entry, "startnodeid": start_node["id"]}) - async def delete_node(self, label, entry): + async def delete_node(self, label: NodeType, entry): """ Helper function used for deleting a node with given id and label + + We don't really delete it because we have to keep track of modified nodes. + We set the entry type label to REMOVED_