From c225e28c582fafdbe4b6d0acad9b5fe80fccf2f7 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:08:14 +1000 Subject: [PATCH 01/14] feat: add macaron database extractor module Signed-off-by: Trong Nhan Mai --- src/macaron/build_spec_generator/__init__.py | 2 + .../macaron_db_extractor.py | 723 ++++++++++++++++++ src/macaron/errors.py | 16 + tests/build_spec_generator/__init__.py | 2 + .../test_macaron_db_extractor.py | 232 ++++++ 5 files changed, 975 insertions(+) create mode 100644 src/macaron/build_spec_generator/__init__.py create mode 100644 src/macaron/build_spec_generator/macaron_db_extractor.py create mode 100644 tests/build_spec_generator/__init__.py create mode 100644 tests/build_spec_generator/test_macaron_db_extractor.py diff --git a/src/macaron/build_spec_generator/__init__.py b/src/macaron/build_spec_generator/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/src/macaron/build_spec_generator/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py new file mode 100644 index 000000000..05a485b7d --- /dev/null +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -0,0 +1,723 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the logic to extract build relation information for a PURL from the Macaron database.""" + +import json +import logging +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TypeVar + +from packageurl import PackageURL +from sqlalchemy import Select, and_, select +from sqlalchemy.dialects import sqlite +from sqlalchemy.exc import MultipleResultsFound, SQLAlchemyError +from sqlalchemy.orm import Session, aliased + +from macaron.database.table_definitions import Analysis, CheckFacts, Component, MappedCheckResult, Repository +from macaron.errors import QueryMacaronDatabaseError +from macaron.slsa_analyzer.checks.build_as_code_check import BuildAsCodeFacts +from macaron.slsa_analyzer.checks.build_script_check import BuildScriptFacts +from macaron.slsa_analyzer.checks.build_service_check import BuildServiceFacts +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts + +logger: logging.Logger = logging.getLogger(__name__) + + +@dataclass +class GenericBuildCommandInfo: + """Contains the build command information extracted from build related check facts.""" + + command: list[str] + language: str + language_versions: list[str] + build_tool_name: str + + +T = TypeVar("T") + + +def lookup_multiple( + select_statement: Select[tuple[T]], + session: Session, +) -> Sequence[T]: + """Perform an SELECT statement and return all scalar results. + + Parameters + ---------- + select_statement : Select[tuple[T]] + The SQLAlchemy SELECT statement to execute. + session : Session + The SQLAlchemy session to the database we are querying from. + + Returns + ------- + Sequence[T] + The result of executing the SELECT statement as scalar values. + + Raises + ------ + QueryMacaronDatabaseError + If the SELECT statement isn't executed successfully. + For example, if the schema of the target database doesn't match the statement. + """ + try: + sql_results = session.execute(select_statement) + except SQLAlchemyError as generic_exec_error: + raise QueryMacaronDatabaseError( + f"Critical: unexpected error when execute query {compile_sqlite_select_statement(select_statement)}." + ) from generic_exec_error + + return sql_results.scalars().all() + + +def lookup_one_or_none( + select_statement: Select[tuple[T]], + session: Session, +) -> T | None: + """Perform an SELECT statement and return at most one scalar result. + + Parameters + ---------- + select_statement : Select[tuple[T]] + The SQLAlchemy SELECT statement to execute + session : Session + The SQLAlchemy session to the database we are querying from. + + Returns + ------- + T | None + The result of executing the SELECT statement as one scalar value or None + if there isn't any available. + + Raises + ------ + QueryMacaronDatabaseError + If the SELECT statement isn't executed successfully. + For example, if the schema of the target database doesn't match the statement. + Of if there are more than one result obtained from the SELECT statement. + """ + compiled_select_statement = compile_sqlite_select_statement(select_statement) + try: + query_scalar_results = session.execute(select_statement).scalars() + except SQLAlchemyError as generic_exec_error: + raise QueryMacaronDatabaseError( + f"Critical: unexpected error when execute query {compiled_select_statement}." + ) from generic_exec_error + + try: + result = query_scalar_results.one_or_none() + except MultipleResultsFound as error: + raise QueryMacaronDatabaseError( + f"Expect at most one result, found multiple results for query {compiled_select_statement}." + ) from error + + return result + + +def compile_sqlite_select_statement(select_statment: Select) -> str: + """Return the equivalent SQLite SELECT statement from an SQLAlchemy SELECT statement. + + This function also introduces additional cosmetic details so that it can be easily + read from the log. + + Parameters + ---------- + select_statement : Select + The SQLAlchemy Select statement. + + Returns + ------- + str + The equivalent SQLite SELECT statement as a string. + """ + compiled_sqlite = select_statment.compile( + dialect=sqlite.dialect(), # type: ignore + compile_kwargs={"literal_binds": True}, + ) + return f"\n----- Begin SQLite query \n{str(compiled_sqlite)}\n----- End SQLite query\n" + + +def get_sql_stmt_latest_component_for_purl(purl: PackageURL) -> Select[tuple[Component]]: + """Return an SQLAlchemy SELECT statement to query the latest Component. + + Parameters + ---------- + purl : PackageURL + The PackageURL object to find the Component instance. + + Returns + ------- + Select[tuple[Component]] + The SQLAlchemy SELECT statement to query the latest analyzed Component instance + corresponding to the PackageURL. + """ + return ( + select( + Component, + ) + .select_from(Component) + .join( + Analysis, + onclause=Component.id == Analysis.id, + ) + .where(Component.purl == purl.to_string()) + .order_by( + Analysis.analysis_time.desc(), + Analysis.id.desc(), + ) + ) + + +def get_sql_stmt_build_tools(component_id: int) -> Select[tuple[BuildToolFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildToolFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildToolFacts. + + Returns + ------- + Select[tuple[BuildAsCodeFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildToolFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_tool_facts_alias = aliased(BuildToolFacts, flat=True) + + return ( + select(build_tool_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=Component.id == MappedCheckResult.component_id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, + ) + .join( + build_tool_facts_alias, + onclause=CheckFacts.id == build_tool_facts_alias.id, + ) + .where(Component.id == component_id) + .order_by( + build_tool_facts_alias.confidence.desc(), + build_tool_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_build_as_code_check(component_id: int) -> Select[tuple[BuildAsCodeFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildAsCodeFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildToolFacts. + + Returns + ------- + Select[tuple[BuildAsCodeFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildAsCodeFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_as_code_facts_alias = aliased(BuildAsCodeFacts, flat=True) + + return ( + select(build_as_code_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=MappedCheckResult.id == Component.id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.id, + ) + .join( + build_as_code_facts_alias, + onclause=CheckFacts.id == build_as_code_facts_alias.id, + ) + .where( + and_( + Component.id == component_id, + build_as_code_facts_alias.deploy_command.is_not(None), + ) + ) + .order_by( + build_as_code_facts_alias.confidence.desc(), + build_as_code_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_build_service_check(component_id: int) -> Select[tuple[BuildServiceFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildServiceFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildServiceFacts. + + Returns + ------- + Select[tuple[BuildServiceFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildServiceFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_service_facts_alias = aliased(BuildServiceFacts, flat=True) + + return ( + select(build_service_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=MappedCheckResult.component_id == Component.id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.id, + ) + .join( + build_service_facts_alias, + onclause=CheckFacts.id == build_service_facts_alias.id, + ) + .where( + and_( + Component.id == component_id, + build_service_facts_alias.build_command.is_not(None), + ) + ) + .order_by( + build_service_facts_alias.confidence.desc(), + build_service_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_build_script_check(component_id: int) -> Select[tuple[BuildScriptFacts]]: + """Return an SQLAlchemy SELECT statement to query the BuildScriptFacts for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the BuildScriptFacts. + + Returns + ------- + Select[tuple[BuildScriptFacts]] + The SQLAlchemy SELECT statement. + """ + # Because BuildScriptFacts inherit from CheckFacts, SQLAlchemy had to perform implicit alias + # when performing a join between them. This pattern is not recommended, hence a warning is raised + # https://docs.sqlalchemy.org/en/20/errors.html#an-alias-is-being-generated-automatically-due-to-overlapping-tables. + # To resolve this, we need to create an SQLAlchemy alias and use it in the SELECT statement. + build_script_facts_alias = aliased(BuildScriptFacts, flat=True) + + return ( + select(build_script_facts_alias) + .select_from(Component) + .join( + MappedCheckResult, + onclause=Component.id == MappedCheckResult.component_id, + ) + .join( + CheckFacts, + onclause=MappedCheckResult.id == CheckFacts.id, + ) + .join( + build_script_facts_alias, + onclause=CheckFacts.id == build_script_facts_alias.id, + ) + .where( + and_( + Component.id == component_id, + build_script_facts_alias.build_tool_command.is_not(None), + ) + ) + .order_by( + build_script_facts_alias.confidence.desc(), + build_script_facts_alias.id.asc(), + ) + ) + + +def get_sql_stmt_repository(component_id: int) -> Select[tuple[Repository]]: + """Return an SQLAlchemy SELECT statement to query the Repository for a given PackageURL. + + Parameters + ---------- + purl_string : str + The PackageURL string to find the Repository. + + Returns + ------- + Select[tuple[Repository]] + The SQLAlchemy SELECT statement. + """ + return ( + select(Repository) + .select_from(Component) + .join( + Repository, + onclause=Component.id == Repository.component_id, + ) + .where(Component.id == component_id) + ) + + +def lookup_latest_component_id(purl: PackageURL, session: Session) -> int | None: + """Return the component id of the latest analysis that matches a given PackageURL string. + + Parameters + ---------- + purl : PackageURL + The PackageURL object to look for the latest component id. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + int | None + The latest component id or None if there isn't one available in the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + latest_component_id_stmt = get_sql_stmt_latest_component_for_purl(purl) + logger.debug("Latest Analysis and Component query \n %s", compile_sqlite_select_statement(latest_component_id_stmt)) + + try: + component_results = session.execute(latest_component_id_stmt) + except SQLAlchemyError as generic_exec_error: + raise QueryMacaronDatabaseError( + f"Critical: unexpected error when execute query {compile_sqlite_select_statement(latest_component_id_stmt)}." + ) from generic_exec_error + + latest_component = component_results.scalars().first() + if not latest_component: + return None + + return latest_component.id + + +def lookup_build_tools_check(component_id: int, session: Session) -> Sequence[BuildToolFacts]: + """Return the sequence of BuildToolFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildToolFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildToolFacts] + The sequence of BuildToolFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_tools_statement = get_sql_stmt_build_tools(component_id) + logger.debug( + "Build Tools Check Facts for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_tools_statement), + ) + + build_tool_facts = lookup_multiple( + select_statement=build_tools_statement, + session=session, + ) + + return build_tool_facts + + +def lookup_build_as_code_check(component_id: int, session: Session) -> Sequence[BuildAsCodeFacts]: + """Return the sequence of BuildAsCodeFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildAsCodeFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildAsCodeFacts] + The sequence of BuildAsCodeFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_as_code_select_statement = get_sql_stmt_build_as_code_check(component_id) + logger.debug( + "Build As Code Check Fact for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_as_code_select_statement), + ) + + build_as_code_check_facts = lookup_multiple( + select_statement=build_as_code_select_statement, + session=session, + ) + + return build_as_code_check_facts + + +def lookup_build_service_check(component_id: int, session: Session) -> Sequence[BuildServiceFacts]: + """Return the sequence of BuildServiceFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildServiceFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildServiceFacts] + The sequence of BuildServiceFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_service_select_statement = get_sql_stmt_build_service_check(component_id) + logger.debug( + "Build Service Check Fact for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_service_select_statement), + ) + + build_service_check_facts = lookup_multiple( + select_statement=build_service_select_statement, + session=session, + ) + + return build_service_check_facts + + +def lookup_build_script_check(component_id: int, session: Session) -> Sequence[BuildScriptFacts]: + """Return the sequence of BuildScriptFacts instances for given PackageURL string. + + Parameters + ---------- + purl_string : str + The PackageURL string to look for the BuildScriptFacts. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Sequence[BuildScriptFacts] + The sequence of BuildScriptFacts instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query. + """ + build_script_select_statment = get_sql_stmt_build_script_check(component_id) + logger.debug( + "Build Script Check Fact for component %d \n %s", + component_id, + compile_sqlite_select_statement(build_script_select_statment), + ) + + build_script_check_facts = lookup_multiple( + select_statement=build_script_select_statment, + session=session, + ) + + return build_script_check_facts + + +def extract_generic_build_command_info( + check_facts: Sequence[BuildAsCodeFacts] | Sequence[BuildServiceFacts] | Sequence[BuildScriptFacts], +) -> list[GenericBuildCommandInfo]: + """Return the list of GenericBuildCommandInfo instances from a list of Build related Check Facts. + + The following information are captured for each Check Facts + + - ``command``: the build command, but this information is located in different attribute depending on the + type of Build Check Fact (e.g. in `BuildAsCodeFacts` it is stored in `deploy_command`). It's stored + in the database as a serialized JSON object so we need to use json.loads to turn it into a list of strings. + + - ``language`` and ``build_tool_name`` are attributes of all Build Check Fact instances + + - ``language_versions`` is an attribute of all Build Check Fact instances. It's stored + in the database as a serialized JSON object so we need to use json.loads to turn it into a list of strings. + + Parameters + ---------- + check_facts : Sequence[BuildAsCodeFacts] | Sequence[BuildServiceFacts] | Sequence[BuildScriptFacts] + The sequence of check facts obtained from the database. + + Returns + ------- + list[GenericBuildCommandInfo] + The list of GenericBuildCommandInfo instances that store build command information + representing by the Build Check Facts. + + Raises + ------ + json.decoder.JSONDecodeError + If we failed to decode the JSON-serialized values stored in the Build*Facts instances. + """ + result = [] + for fact in check_facts: + match fact: + case BuildAsCodeFacts(): + result.append( + GenericBuildCommandInfo( + command=json.loads(fact.deploy_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + ) + ) + case BuildServiceFacts(): + result.append( + GenericBuildCommandInfo( + command=json.loads(fact.build_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + ) + ) + case BuildScriptFacts(): + result.append( + GenericBuildCommandInfo( + command=json.loads(fact.build_tool_command), + language=fact.language, + language_versions=json.loads(fact.language_versions) if fact.language_versions else [], + build_tool_name=fact.build_tool_name, + ) + ) + + return result + + +def lookup_any_build_command(component_id: int, session: Session) -> list[GenericBuildCommandInfo]: + """Return a list of ``GenericBuildCommandInfo`` instances from looking up any available build command. + + We will look for available build command from build-related check facts. + + Parameters + ---------- + component_id: int + The component id to lookup the build command. + session: Session + The SQLAlchemy session to the database for the lookup. + + Returns + ------- + list[GenericBuildCommandInfo] + This list will be empty if there is no available build command for this component. + + Raises + ------ + QueryMacaronDatabaseError + If there is an unexpected error when executing the SQLAlchemy query for looking up the build commands. + Raised by "lookup_*_check" functions + """ + build_as_code_check_facts = lookup_build_as_code_check( + component_id=component_id, + session=session, + ) + if build_as_code_check_facts: + try: + return extract_generic_build_command_info(build_as_code_check_facts) + except json.decoder.JSONDecodeError as error: + logger.debug( + "Failed to extract generic build command info for build as code check facts for component id %s. " + + "Error %s. Continue", + component_id, + error, + ) + + build_service_check_facts = lookup_build_service_check( + component_id=component_id, + session=session, + ) + if build_service_check_facts: + try: + return extract_generic_build_command_info(build_service_check_facts) + except json.decoder.JSONDecodeError as error: + logger.debug( + "Failed to extract generic build command info for build servoce check facts for component id %s. " + + "Error %s. Continue", + component_id, + error, + ) + + build_script_check_facts = lookup_build_script_check( + component_id=component_id, + session=session, + ) + try: + return extract_generic_build_command_info(build_script_check_facts) + except json.decoder.JSONDecodeError as error: + logger.debug( + "Failed to extract generic build command info for build as code check facts for component id %s. " + + "Error %s. Continue", + component_id, + error, + ) + return [] + + +def lookup_repository(component_id: int, session: Session) -> Repository | None: + """Return the Repository instance for given PackageURL string. + + Parameters + ---------- + component_id : int + The component id to look for the Repository. + session : Session + The SQLAlcemy Session that connects to the Macaron database. + + Returns + ------- + Repository + The Repository instances obtained from querying the database. + + Raises + ------ + QueryMacaronDatabaseError + If the query result from the database contains more than one Repository instance, + or there is an unexpected error when executing the SQLAlchemy query. + """ + repository_select_statement = get_sql_stmt_repository(component_id) + logger.debug( + "Repository for component %d \n %s.", component_id, compile_sqlite_select_statement(repository_select_statement) + ) + + repository_result = lookup_one_or_none( + select_statement=repository_select_statement, + session=session, + ) + + return repository_result diff --git a/src/macaron/errors.py b/src/macaron/errors.py index d5983a0bc..91ce63990 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -113,3 +113,19 @@ class LocalArtifactFinderError(MacaronError): class SourceCodeError(MacaronError): """Error for operations on package source code.""" + + +class CommandLineParseError(Exception): + """Raised if an error is encountered while parsing a CLI Command.""" + + +class PatchBuildCommandError(Exception): + """Raised if an error is encountered while patching a Maven CLI Command.""" + + +class QueryMacaronDatabaseError(Exception): + """Happens when there is an unexpected error while querying the database using SQLAlchemy.""" + + +class GenerateBuildSpecError(Exception): + """Happens when there is an unexpected error while generate the build spec file.""" diff --git a/tests/build_spec_generator/__init__.py b/tests/build_spec_generator/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/tests/build_spec_generator/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/tests/build_spec_generator/test_macaron_db_extractor.py b/tests/build_spec_generator/test_macaron_db_extractor.py new file mode 100644 index 000000000..1c7f3c4bd --- /dev/null +++ b/tests/build_spec_generator/test_macaron_db_extractor.py @@ -0,0 +1,232 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the macaron_db_extractor module.""" + +from collections.abc import Generator +from datetime import datetime, timezone +from typing import Any + +import pytest +from packageurl import PackageURL +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from macaron import __version__ +from macaron.build_spec_generator.macaron_db_extractor import ( + QueryMacaronDatabaseError, + Repository, + lookup_any_build_command, + lookup_build_tools_check, + lookup_latest_component_id, + lookup_repository, +) +from macaron.database.table_definitions import Analysis, CommitFinderInfo, Component, ORMBase, RepoFinderMetadata +from macaron.repo_finder.repo_finder import RepoFinderInfo + +# pylint: disable=redefined-outer-name + + +@pytest.fixture() +def macaron_db_session() -> Generator[Session, Any, None]: + """Return a session to a memory stored SQLite database with Macaron's database schema. + + The database is empty. This fixture's scope is function to prevent polluting between tests. + It also handles closing the session after the test function finishes. + """ + engine = create_engine("sqlite:///:memory:") + ORMBase.metadata.create_all(engine) + + session_maker = sessionmaker(engine) + session = session_maker() + + yield session + + session.close() + + +@pytest.fixture() +def invalid_db_session() -> Generator[Session, Any, None]: + """Return a session to a memory stored SQLite database. + + This databaes doesn't have Macaron database schema, hence it considered invalid. + """ + engine = create_engine("sqlite:///:memory:") + + session_maker = sessionmaker(engine) + session = session_maker() + + yield session + + session.close() + + +@pytest.mark.parametrize( + ("input_data", "query_purl_string", "expect_result"), + [ + pytest.param( + [], + "pkg:maven/oracle/macaron@0.16.0", + None, + id="The database is empty.", + ), + pytest.param( + [ + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/foo@0.2.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/boohoo@1.0", + ), + ], + "pkg:maven/oracle/macaron@0.16.0", + None, + id="The database is not empty, but no component matches the query PackageURL string.", + ), + pytest.param( + [ + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/boo/foo@0.1.0", + ), + ( + datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + "pkg:maven/oracle/macaron@0.16.0", + ), + ], + "pkg:maven/oracle/macaron@0.16.0", + 3, + id="When two analyses of the same PURL has the same timestamp, the component id of the latest analysis is returned.", + ), + ], +) +def test_lookup_latest_component_id( + macaron_db_session: Session, + input_data: list[tuple[datetime, str]], + query_purl_string: str, + expect_result: int | None, +) -> None: + """Test the lookup_latest_component_id function.""" + for utc_timestamp, purl_string in input_data: + analysis = Analysis( + analysis_time=utc_timestamp, + macaron_version=__version__, + ) + + repo_finder_metadata = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + _ = Component( + purl=purl_string, + analysis=analysis, + repository=None, + repo_finder_metadata=repo_finder_metadata, + ) + + macaron_db_session.add(analysis) + + macaron_db_session.commit() + assert lookup_latest_component_id(PackageURL.from_string(query_purl_string), macaron_db_session) == expect_result + + +def test_lookup_repository_empty_db(macaron_db_session: Session) -> None: + """Test the lookup_repository function.""" + assert not lookup_repository(1, macaron_db_session) + + +def test_lookup_repository(macaron_db_session: Session) -> None: + """Test the lookup_repository function.""" + analysis = Analysis( + analysis_time=datetime(year=2025, month=5, day=6, hour=10, minute=30, second=30, tzinfo=timezone.utc), + macaron_version=__version__, + ) + + repository = Repository( + full_name="oracle/macaron", + complete_name="github.com/oracle/macaron", + remote_path="https://github.com/oracle/macaron", + branch_name="main", + commit_sha="d2b95262091d6572cc12dcda57d89f9cd44ac88b", + commit_date="2023-02-10T15:11:14+08:00", + fs_path="/boo/foo/macaron", + files=["boo.txt", "foo.xml"], + ) + + repo_finder_metadata_1 = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + repo_finder_metadata_2 = RepoFinderMetadata( + repo_finder_outcome=RepoFinderInfo.NOT_USED, + commit_finder_outcome=CommitFinderInfo.NOT_USED, + found_url="", + found_commit="", + ) + + component_without_repo = Component( + purl="pkg:maven/boo/foo@0.1.0", + analysis=analysis, + repository=None, + repo_finder_metadata=repo_finder_metadata_1, + ) + + component_with_repo = Component( + purl="pkg:maven/oracle/macaron@0.16.0", + analysis=analysis, + repository=repository, + repo_finder_metadata=repo_finder_metadata_2, + ) + + macaron_db_session.add(analysis) + macaron_db_session.commit() + + assert not lookup_repository(component_without_repo.id, macaron_db_session) + lookup_repo = lookup_repository(component_with_repo.id, macaron_db_session) + assert lookup_repo + assert lookup_repo.remote_path == "https://github.com/oracle/macaron" + assert lookup_repo.commit_sha == "d2b95262091d6572cc12dcda57d89f9cd44ac88b" + + +def test_lookup_any_build_command_empty_db(macaron_db_session: Session) -> None: + """Test the lookup_any_build_command function with an empty database.""" + assert not lookup_any_build_command(component_id=1, session=macaron_db_session) + + +def test_invalid_input_databse(invalid_db_session: Session) -> None: + """Test handling invalid input database.""" + with pytest.raises(QueryMacaronDatabaseError): + lookup_any_build_command( + component_id=1, + session=invalid_db_session, + ) + + with pytest.raises(QueryMacaronDatabaseError): + lookup_build_tools_check( + component_id=1, + session=invalid_db_session, + ) + + with pytest.raises(QueryMacaronDatabaseError): + lookup_repository( + component_id=1, + session=invalid_db_session, + ) + + with pytest.raises(QueryMacaronDatabaseError): + lookup_latest_component_id( + purl=PackageURL.from_string("pkg:maven/oracle/macaron@0.16.0"), + session=invalid_db_session, + ) From 95432bd5fb26a8e8cc992b0f00da7a1f1d38589a Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:12:35 +1000 Subject: [PATCH 02/14] feat: add maven and gradle cli parsers Signed-off-by: Trong Nhan Mai --- .../cli_command_parser/__init__.py | 165 +++++ .../cli_command_parser/gradle_cli_command.py | 388 ++++++++++ .../cli_command_parser/gradle_cli_parser.py | 701 ++++++++++++++++++ .../cli_command_parser/maven_cli_command.py | 324 ++++++++ .../cli_command_parser/maven_cli_parser.py | 594 +++++++++++++++ .../unparsed_cli_command.py | 20 + .../test_base_cli_options.py | 129 ++++ .../test_gradle_cli_command.py | 156 ++++ .../test_gradle_cli_parser.py | 165 +++++ .../test_maven_cli_command.py | 142 ++++ .../test_maven_cli_parser.py | 197 +++++ tests/conftest.py | 14 + 12 files changed, 2995 insertions(+) create mode 100644 src/macaron/build_spec_generator/cli_command_parser/__init__.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py create mode 100644 src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_base_cli_options.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py create mode 100644 tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py diff --git a/src/macaron/build_spec_generator/cli_command_parser/__init__.py b/src/macaron/build_spec_generator/cli_command_parser/__init__.py new file mode 100644 index 000000000..8801ea55a --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/__init__.py @@ -0,0 +1,165 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contain the base classes cli command parsers related.""" + +import argparse +from abc import abstractmethod +from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum +from typing import Any, Generic, Protocol, TypeGuard, TypeVar + + +def is_list_of_strs(value: Any) -> TypeGuard[list[str]]: + """Type guard for a list of strings.""" + return isinstance(value, list) and all(isinstance(ele, str) for ele in value) + + +def is_dict_of_str_to_str_or_none(value: Any) -> TypeGuard[dict[str, str | None]]: + """Type guard for a dictionary with keys are string and values are strings or None.""" + if not isinstance(value, dict): + return False + + for key, val in value.items(): + if not isinstance(key, str): + return False + + if not (val is None or isinstance(val, str)): + return False + + return True + + +def patch_mapping( + original: Mapping[str, str], + patch: Mapping[str, str | None], +) -> dict[str, str]: + """Patch a mapping. + + A key with value in patch set to None will be removed from the original. + + Parameters + ---------- + original: Mapping[str, str] + The original mapping. + patch: Mapping[str, str | None] + The patch. + + Returns + ------- + dict[str, str]: + The new dictionary after applying the patch. + """ + patch_result = dict(original) + + for name, value in patch.items(): + if value is None: + patch_result.pop(name, None) + else: + patch_result[name] = value + + return patch_result + + +P = TypeVar("P") + + +@dataclass +class OptionDef(Generic[P]): + """This class represent a definition of a CLI option for argparse.ArgumentParser. + + This class also contains the information for validating a patch value. + The generic type T is the patch expected type (if it's not None). + """ + + # e.g. `--long-option-name` + # We always require the long name as we use it as the unique identifier in the parser. + long_name: str + + @abstractmethod + def is_valid_patch_option(self, patch: Any) -> TypeGuard[P]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + raise NotImplementedError() + + @abstractmethod + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + raise NotImplementedError() + + @abstractmethod + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + raise NotImplementedError() + + +class PatchCommandBuildTool(str, Enum): + """Build tool supported for CLICommand patching.""" + + MAVEN = "maven" + GRADLE = "gradle" + + +class CLIOptions(Protocol): + """Interface of the options part of a CLICommand.""" + + def to_option_cmds(self) -> list[str]: + """Return the options as a list of strings.""" + + +class CLICommand(Protocol): + """Interface of a CLI Command.""" + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + + +T = TypeVar("T", bound="CLICommand") +Y_contra = TypeVar("Y_contra", contravariant=True) + + +class CLICommandParser(Protocol[T, Y_contra]): + """Interface of a CLI Command Parser.""" + + @property + def build_tool(self) -> PatchCommandBuildTool: + """Return the ``BuildTool`` enum corresponding to this CLICommand.""" + + def parse(self, cmd_list: list[str]) -> CLICommand: + """Parse the CLI Command. + + Parameters + ---------- + cmd_list: list[str] + The CLI Command as list of strings. + + Returns + ------- + CLICommand + The CLICommand instance. + + Raises + ------ + CommandLineParseError + If an error happens when parsing the CLI Command. + """ + + def is_build_tool(self, executable_path: str) -> bool: + """Return True if ``executable_path`` ends the accepted executable for this build tool. + + Parameters + ---------- + executable_path: str + The executable component of a CLI command. + + Returns + ------- + bool + """ + + def apply_patch( + self, + cli_command: T, + options_patch: Mapping[str, Y_contra | None], + ) -> T: + """Return the a new CLICommand object with its option patched, while persisting the executable path.""" diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py new file mode 100644 index 000000000..48d0000fc --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_command.py @@ -0,0 +1,388 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the classes that represent components of a Gradle CLI Command.""" + +import argparse +from dataclasses import dataclass + + +@dataclass +class GradleCLIOptions: + """The class that stores the values of options parsed from a Gradle CLI Command.""" + + # Optional flags with a different attribute name. + continue_: bool | None + help_: bool | None + + # Optional flags. + no_rebuild: bool | None + debug: bool | None + export_keys: bool | None + foreground: bool | None + info: bool | None + offline: bool | None + profile: bool | None + quiet: bool | None + refresh_dependencies: bool | None + refresh_keys: bool | None + rerun_tasks: bool | None + full_stacktrace: bool | None + stacktrace: bool | None + status: bool | None + stop: bool | None + continuous: bool | None + version: bool | None + warn: bool | None + write_locks: bool | None + build_cache: bool | None + configuration_cache: bool | None + configure_on_demand: bool | None + daemon: bool | None + parallel: bool | None + scan: bool | None + watch_fs: bool | None + + # Single value options. + build_file: str | None + settings_file: str | None + configuration_cache_problems: str | None + gradle_user_home: str | None + init_script: str | None + include_build: str | None + write_verification_metadata: str | None + max_workers: str | None + project_dir: str | None + priority: str | None + project_cache_dir: str | None + update_locks: str | None + warning_mode: str | None + + # Appended list option. + exclude_task: list[str] | None + + # Property definition options. + system_prop: dict[str, str] | None + project_prop: dict[str, str] | None + + # Gradle tasks. + tasks: list[str] | None + + @classmethod + def from_parsed_arg( + cls, + parsed_arg: argparse.Namespace, + ) -> "GradleCLIOptions": + """Initialize the instance from an argparse.Namespace object. + + Parameters + ---------- + parsed_arg : argparse.Namespace + The argparse.Namespace object obtained from parsing the CLI Command. + + Returns + ------- + GradleCLIOptions + The intialized GradleCLIOptions object instance. + """ + return cls( + help_=parsed_arg.help_, + no_rebuild=parsed_arg.no_rebuild, + continue_=parsed_arg.continue_, + debug=parsed_arg.debug, + export_keys=parsed_arg.export_keys, + foreground=parsed_arg.foreground, + info=parsed_arg.info, + offline=parsed_arg.offline, + profile=parsed_arg.profile, + quiet=parsed_arg.quiet, + refresh_dependencies=parsed_arg.refresh_dependencies, + refresh_keys=parsed_arg.refresh_keys, + rerun_tasks=parsed_arg.rerun_tasks, + full_stacktrace=parsed_arg.full_stacktrace, + stacktrace=parsed_arg.stacktrace, + status=parsed_arg.status, + stop=parsed_arg.stop, + continuous=parsed_arg.continuous, + version=parsed_arg.version, + warn=parsed_arg.warn, + write_locks=parsed_arg.write_locks, + build_cache=parsed_arg.build_cache, + configuration_cache=parsed_arg.configuration_cache, + configure_on_demand=parsed_arg.configure_on_demand, + daemon=parsed_arg.daemon, + parallel=parsed_arg.parallel, + scan=parsed_arg.scan, + watch_fs=parsed_arg.watch_fs, + build_file=parsed_arg.build_file, + settings_file=parsed_arg.settings_file, + configuration_cache_problems=parsed_arg.configuration_cache_problems, + gradle_user_home=parsed_arg.gradle_user_home, + init_script=parsed_arg.init_script, + include_build=parsed_arg.include_build, + write_verification_metadata=parsed_arg.write_verification_metadata, + max_workers=parsed_arg.max_workers, + project_dir=parsed_arg.project_dir, + priority=parsed_arg.priority, + project_cache_dir=parsed_arg.project_cache_dir, + update_locks=parsed_arg.update_locks, + warning_mode=parsed_arg.warning_mode, + exclude_task=parsed_arg.exclude_task, + system_prop=GradleCLIOptions.parse_properties(parsed_arg.system_prop) if parsed_arg.system_prop else None, + project_prop=( + GradleCLIOptions.parse_properties(parsed_arg.project_prop) if parsed_arg.project_prop else None + ), + tasks=parsed_arg.tasks, + ) + + @staticmethod + def parse_properties(props: list[str]) -> dict[str, str]: + """Return a dictionary that maps between a property and its value. + + Each property definition value in `props` can have either of these format: + - `property=value` (e.g. `property=value` from `-Dproperty=value`): this will + be parsed into a dictionary mapping of `"property": "value"`. + Both the key and value of this mapping is of type string. + - `property` (e.g. `property` from `-Dproperty`): this will be parsed into a + dictionary mapping of `"property": `. + + Parameters + ---------- + props: list[str] + The list of properties definition provided in the cli command. + This is the list parsed by argparse. + + Returns + ------- + dict[str, str]: + The properties dictionary. + + Examples + -------- + >>> GradleCLIOptions.parse_properties(["boo=true", "foo=1", "bar"]) + {'boo': 'true', 'foo': '1', 'bar': ''} + """ + system_props = {} + for ele in props: + prop_name, _, prop_val = ele.partition("=") + + if not prop_val: + system_props[prop_name] = "" + else: + system_props[prop_name] = prop_val + + return system_props + + def to_option_cmds(self) -> list[str]: + """Return the options as a list of strings.""" + result = self.to_cmd_no_tasks() + if self.tasks: + for task in self.tasks: + result.append(task) + + return result + + def to_cmd_no_tasks(self) -> list[str]: + """Return the options only as a list of string. + + Only enabled options are returned. + + Returns + ------- + list[str] + The enabled options. + """ + result = [] + + if self.help_: + result.append("-h") + + if self.no_rebuild: + result.append("-a") + + if self.continue_: + result.append("--continue") + + if self.debug: + result.append("-d") + + if self.export_keys: + result.append("--export-keys") + + if self.foreground: + result.append("--foreground") + + if self.info: + result.append("-i") + + if self.offline: + result.append("--offline") + + if self.profile: + result.append("--profile") + + if self.quiet: + result.append("-q") + + if self.refresh_dependencies: + result.append("--refresh-dependencies") + + if self.refresh_keys: + result.append("--refresh-keys") + + if self.rerun_tasks: + result.append("--rerun-tasks") + + if self.full_stacktrace: + result.append("-S") + + if self.stacktrace: + result.append("-s") + + if self.status: + result.append("--status") + + if self.stop: + result.append("--stop") + + if self.continuous: + result.append("-t") + + if self.version: + result.append("-v") + + if self.warn: + result.append("-w") + + if self.write_locks: + result.append("--write-locks") + + if self.build_cache is not None: + if self.build_cache is True: + result.append("--build-cache") + else: + result.append("--no-build-cache") + + if self.configuration_cache is not None: + if self.configuration_cache is True: + result.append("--configuration-cache") + else: + result.append("--no-configuration-cache") + + if self.configure_on_demand is not None: + if self.configure_on_demand is True: + result.append("--configure-on-demand") + else: + result.append("--no-configure-on-demand") + + if self.daemon is not None: + if self.daemon is True: + result.append("--daemon") + else: + result.append("--no-daemon") + + if self.parallel is not None: + if self.parallel is True: + result.append("--parallel") + else: + result.append("--no-parallel") + + if self.scan is not None: + if self.scan is True: + result.append("--scan") + else: + result.append("--no-scan") + + if self.watch_fs is not None: + if self.watch_fs is True: + result.append("--watch-fs") + else: + result.append("--no-watch-fs") + + if self.build_file: + result.append("-b") + result.append(self.build_file) + + if self.settings_file: + result.append("-c") + result.append(self.settings_file) + + if self.configuration_cache_problems: + result.append("--configuration-cache-problems") + result.append(self.configuration_cache_problems) + + if self.gradle_user_home: + result.append("-g") + result.append(self.gradle_user_home) + + if self.init_script: + result.append("-I") + result.append(self.init_script) + + if self.include_build: + result.append("--include-build") + result.append(self.include_build) + + if self.write_verification_metadata: + result.append("-M") + result.append(self.write_verification_metadata) + + if self.max_workers: + result.append("--max-workers") + result.append(self.max_workers) + + if self.project_dir: + result.append("-p") + result.append(self.project_dir) + + if self.priority: + result.append("--priority") + result.append(self.priority) + + if self.project_cache_dir: + result.append("--project-cache-dir") + result.append(self.project_cache_dir) + + if self.update_locks: + result.append("--update-locks") + result.append(self.update_locks) + + if self.warning_mode: + result.append("--warning-mode") + result.append(self.warning_mode) + + if self.exclude_task: + for task in self.exclude_task: + result.append("-x") + result.append(task) + + if self.system_prop: + for key, value in self.system_prop.items(): + if value: + result.append(f"-D{key}={value}") + else: + result.append(f"-D{key}") + + if self.project_prop: + for key, value in self.project_prop.items(): + if value: + result.append(f"-P{key}={value}") + else: + result.append(f"-P{key}") + + return result + + +@dataclass +class GradleCLICommand: + """The class that stores the values of a Gradle CLI Command.""" + + executable: str + options: GradleCLIOptions + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + result = [] + result.append(self.executable) + result.extend(self.options.to_option_cmds()) + return result diff --git a/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py new file mode 100644 index 000000000..c66c6c4e5 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/gradle_cli_parser.py @@ -0,0 +1,701 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the Gradle CLI Command parser.""" + +import argparse +import logging +import os +from collections.abc import Mapping +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Any, TypeGuard + +from macaron.build_spec_generator.cli_command_parser import ( + OptionDef, + PatchCommandBuildTool, + is_dict_of_str_to_str_or_none, + is_list_of_strs, + patch_mapping, +) +from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLICommand, GradleCLIOptions +from macaron.errors import CommandLineParseError, PatchBuildCommandError + +logger: logging.Logger = logging.getLogger(__name__) + + +GradleOptionPatchValueType = str | list[str] | bool | dict[str, str | None] + + +@dataclass +class GradleOptionalFlag(OptionDef[bool]): + """This option represents an optional flag in Gradle CLI command. + + For example: + - Has one short name -d/--debug + - Has no short name --continue + - Has multiple short names -?/-h/--help + + This option can have multiple values, and it's not required. + """ + + short_names: list[str] | None + + # Right now this is used for --continue and --help where the default attribute name for it + # in the returned argparse.Namespace is "continue" which conflicts with a Python keyword and + # "help" which conflicts with the built-in function help(). + dest: str | None = field(default=None) + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, bool) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + kwargs: dict[str, Any] = {} + + kwargs["action"] = "store_true" + if self.dest: + kwargs["dest"] = self.dest + + if self.short_names: + arg_parse.add_argument( + *(self.short_names + [self.long_name]), + **kwargs, + ) + else: + arg_parse.add_argument( + self.long_name, + **kwargs, + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "bool" + + +@dataclass +class GradleOptionalNegateableFlag(OptionDef[bool]): + """This option represents an optional negateable flag in Gradle CLI command. + + For example: --build-cache/--no-build-cache + """ + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, bool) + + @staticmethod + def get_negated_long_name(long_name: str) -> str: + """Return the negated version of a long option name.""" + return f"--no-{long_name.removeprefix('--')}" + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + # We allow providing both the normal and negated form. + negated_long_name = self.get_negated_long_name(self.long_name) + dest = self.long_name.removeprefix("--").replace("-", "_") + + # We set the default to None so that we don't print out these options + # if they are not provided in the original build command in to_cmd_tasks(). + arg_parse.add_argument( + self.long_name, + action="store_true", + default=None, + dest=dest, + ) + + arg_parse.add_argument( + negated_long_name, + action="store_false", + default=None, + dest=dest, + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "bool" + + +@dataclass +class GradleSingleValue(OptionDef[str]): + """This option represents an option that takes a value in Grale CLI command.""" + + short_name: str | None + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, str) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + if self.short_name: + arg_parse.add_argument( + *(self.short_name, self.long_name), + ) + else: + arg_parse.add_argument( + self.long_name, + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "str" + + +@dataclass +class GradlePropeties(OptionDef[dict[str, str | None]]): + """This option represents an option used to define properties values of a Gradle CLI command. + + This option can be defined multiple times and the values are appended into a list of string in argparse. + However, it's stored internally as a dictionary mapping between the system property name to its value. + + In Gradle there are 2 options of this type: + - -D/--system-prop + - -P/--project-prop + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_dict_of_str_to_str_or_none(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="append", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "dict[str, str | None]" + + +@dataclass +class GradleTask(OptionDef[list[str]]): + """This option represents the positional task option in Maven CLI command. + + argparse.Namespace stores this as a list of string. This is stored internally as a list of string. + """ + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + # Doesn't require to allow cases like "gradle --help". + arg_parse.add_argument( + self.long_name, + nargs="*", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list[str]" + + +@dataclass +class GradleAppendedList(OptionDef[list[str]]): + """This option represents an option that can be specify multiple times and they all appended to a list. + + For example, one can exclude multiple tasks with + gradle --exclude-task taskA --exclude-task taskB + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="append", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list[str]" + + +# TODO: some value option only allows you to provide certain values +# For example: --console allows "plain", "auto", "rich" or "verbose". +# They are right now not enforced. We need to think whether we want to enforce them. +GRADLE_OPTION_DEF: list[OptionDef] = [ + GradleOptionalFlag( + short_names=["-?", "-h"], + long_name="--help", + dest="help_", + ), + GradleOptionalFlag( + short_names=["-a"], + long_name="--no-rebuild", + ), + GradleOptionalFlag( + short_names=None, + long_name="--continue", + dest="continue_", + ), + GradleOptionalFlag( + short_names=["-d"], + long_name="--debug", + ), + GradleOptionalFlag( + short_names=None, + long_name="--export-keys", + ), + GradleOptionalFlag( + short_names=None, + long_name="--foreground", + ), + GradleOptionalFlag( + short_names=["-i"], + long_name="--info", + ), + GradleOptionalFlag( + short_names=None, + long_name="--offline", + ), + GradleOptionalFlag( + short_names=None, + long_name="--profile", + ), + GradleOptionalFlag( + short_names=["-q"], + long_name="--quiet", + ), + GradleOptionalFlag( + short_names=None, + long_name="--refresh-dependencies", + ), + GradleOptionalFlag( + short_names=None, + long_name="--refresh-keys", + ), + GradleOptionalFlag( + short_names=None, + long_name="--rerun-tasks", + ), + GradleOptionalFlag( + short_names=["-S"], + long_name="--full-stacktrace", + ), + GradleOptionalFlag( + short_names=["-s"], + long_name="--stacktrace", + ), + GradleOptionalFlag( + short_names=None, + long_name="--status", + ), + GradleOptionalFlag( + short_names=None, + long_name="--stop", + ), + GradleOptionalFlag( + short_names=["-t"], + long_name="--continuous", + ), + GradleOptionalFlag( + short_names=["-v"], + long_name="--version", + ), + GradleOptionalFlag( + short_names=["-w"], + long_name="--warn", + ), + GradleOptionalFlag( + short_names=None, + long_name="--write-locks", + ), + GradleOptionalNegateableFlag( + long_name="--build-cache", + ), + GradleOptionalNegateableFlag( + long_name="--configuration-cache", + ), + GradleOptionalNegateableFlag( + long_name="--configure-on-demand", + ), + GradleOptionalNegateableFlag( + long_name="--daemon", + ), + GradleOptionalNegateableFlag( + long_name="--parallel", + ), + GradleOptionalNegateableFlag( + long_name="--scan", + ), + GradleOptionalNegateableFlag( + long_name="--watch-fs", + ), + # This has been validated by setting up a minimal gradle project. Gradle version 8.14.2 + # gradle init --type java-library + # And use default values for any prompted configuration. + # Then append this block of code into src/build.gradle + # + # task boo { + # doLast { + # println "Running task: boo" + # } + # } + # task foo { + # doLast { + # println "Running task: foo" + # } + # } + # task bar { + # doLast { + # println "Running task: bar" + # } + # } + # task everything(dependsOn: ['boo', 'foo']) { + # doLast { + # println "Running task: everything" + # } + # } + # And then run ./gradlew everything -x boo -x foo + # > Task :lib:bar + # Running task: gamma + # > Task :lib:everything + # Running task: everything + GradleAppendedList( + short_name="-x", + long_name="--exclude-task", + ), + # TODO: determine which of these options can be provided multiple times + GradleSingleValue( + short_name="-b", + long_name="--build-file", + ), + GradleSingleValue( + short_name="-c", + long_name="--settings-file", + ), + GradleSingleValue( + short_name=None, + long_name="--configuration-cache-problems", + ), + GradleSingleValue( + short_name=None, + long_name="--console", + ), + GradleSingleValue( + short_name="-F", + long_name="--dependency-verification", + ), + GradleSingleValue( + short_name="-g", + long_name="--gradle-user-home", + ), + GradleSingleValue( + short_name="-I", + long_name="--init-script", + ), + GradleSingleValue( + short_name=None, + long_name="--include-build", + ), + GradleSingleValue( + short_name="-M", + long_name="--write-verification-metadata", + ), + GradleSingleValue( + short_name=None, + long_name="--max-workers", + ), + GradleSingleValue( + short_name="-p", + long_name="--project-dir", + ), + GradleSingleValue( + short_name=None, + long_name="--priority", + ), + GradleSingleValue( + short_name=None, + long_name="--project-cache-dir", + ), + GradleSingleValue( + short_name=None, + long_name="--update-locks", + ), + GradleSingleValue( + short_name=None, + long_name="--warning-mode", + ), + GradlePropeties( + short_name="-D", + long_name="--system-prop", + ), + GradlePropeties( + short_name="-P", + long_name="--project-prop", + ), + GradleTask( + long_name="tasks", + ), +] + + +class GradleCLICommandParser: + """A Gradle CLI Command Parser.""" + + ACCEPTABLE_EXECUTABLE = ["gradle", "gradlew"] + + def __init__(self) -> None: + """Initialize the instance.""" + self.arg_parser = argparse.ArgumentParser( + description="Parse Gradle CLI command", + prog="mvn", + add_help=False, + # https://docs.python.org/3/library/argparse.html#exit-on-error + # Best effort of parsing the build command. Therefore, we don't want to exit on error. + exit_on_error=False, + ) + + # A mapping between the long name to its option definition. + self.option_defs: dict[str, OptionDef] = {} + + for opt_def in GRADLE_OPTION_DEF: + opt_def.add_itself_to_arg_parser(self.arg_parser) + + self.option_defs[opt_def.long_name] = opt_def + + self.build_tool = PatchCommandBuildTool.GRADLE + + def is_build_tool(self, executable_path: str) -> bool: + """Return True if ``executable_path`` ends the accepted executable for this build tool. + + Parameters + ---------- + executable_path: str + The executable component of a CLI command. + + Returns + ------- + bool + """ + return os.path.basename(executable_path) in GradleCLICommandParser.ACCEPTABLE_EXECUTABLE + + def validate_patch(self, patch: Mapping[str, GradleOptionPatchValueType | None]) -> bool: + """Return True if the patch conforms to the expected format.""" + for patch_name, patch_value in patch.items(): + opt_def = self.option_defs.get(patch_name) + if not opt_def: + logger.error("Cannot find any option that matches %s", patch_name) + return False + + if patch_value is None: + continue + + if not opt_def.is_valid_patch_option(patch_value): + logger.error( + "The patch value %s of %s is not in the correct type. Expect %s.", + patch_value, + patch_name, + opt_def.get_patch_type_str(), + ) + return False + + return True + + def parse(self, cmd_list: list[str]) -> GradleCLICommand: + """Parse the Gradle CLI Command. + + Parameters + ---------- + cmd_list: list[str] + The Gradle CLI Command as list of strings. + + Returns + ------- + GradleCLICommand + The GradleCLICommand instance. + + Raises + ------ + CommandLineParseError + If an error happens when parsing the Gradle CLI Command. + """ + if not cmd_list: + raise CommandLineParseError("The provided cmd list is empty.") + + exe_path = cmd_list[0] + options = cmd_list[1:] + + if os.path.basename(exe_path) not in GradleCLICommandParser.ACCEPTABLE_EXECUTABLE: + raise CommandLineParseError(f"{exe_path} is not an acceptable Gradle executable path.") + + # TODO: because our parser is not completed for all cases, should we be more relaxed and use + # parse_unknown_options? + try: + parsed_opts = self.arg_parser.parse_args(options) + except argparse.ArgumentError as error: + raise CommandLineParseError(f"Failed to parse {' '.join(options)}.") from error + # Even though we have set `exit_on_error`, argparse still exists unexpectedly in some + # cases. This has been confirmed to be a bug in the argparse library implementation. + # https://github.com/python/cpython/issues/121018. + # This is fixed in Python3.12, but not Python3.11 + except SystemExit as sys_exit_err: + raise CommandLineParseError( + f"Failed to parse the Gradle CLI Options {' '.join(options)}." + ) from sys_exit_err + + gradle_cli_options = GradleCLIOptions.from_parsed_arg(parsed_opts) + + return GradleCLICommand( + executable=exe_path, + options=gradle_cli_options, + ) + + def _patch_properties_mapping( + self, + original_props: dict[str, str], + option_long_name: str, + patch_value: GradleOptionPatchValueType, + ) -> dict[str, str]: + prop_opt_def = self.option_defs.get(option_long_name) + if not prop_opt_def or not isinstance(prop_opt_def, GradlePropeties): + raise PatchBuildCommandError(f"{option_long_name} from the patch is not a property type option.") + + if not prop_opt_def.is_valid_patch_option(patch_value): + raise PatchBuildCommandError( + f"Incorrect runtime type for patch option {option_long_name}, value: {patch_value}." + ) + + return patch_mapping( + original=original_props, + patch=patch_value, + ) + + def apply_patch( + self, + cli_command: GradleCLICommand, + options_patch: Mapping[str, GradleOptionPatchValueType | None], + ) -> GradleCLICommand: + """Patch the options of a Gradle CLI command, while persisting the executable path. + + `options_patch` is a mapping with: + + - **Key**: the long name of an Gradle CLI option as string. For example: ``--continue``, ``--build-cache``. + For patching tasks, use the key ``tasks``. + + - **Value**: The value to patch for an option referred to by the key. The type of this value + depends on the type of option you want to patch. Please see the details below. + + The types of patch values: + + - For optional flag (e.g ``-d/--debug``) that doesn't take in a value, it is boolean. True if you want to + set it, and False if you want to unset it. + + - For ``-D/--system-prop`` and ``-P/--project-prop`` ONLY, it is a a mapping between the property name + and its value. A value of type None can be provided to "unset" the property. + + - For ``-x/--exclude-task`` option, a list of string is required. + + - For options that have a negated form (e.g. ``--build-cache/--no-build-cache``), the key must be the normal + long name (``--build-cache``) and the value is of type boolean. True if you want to set ``--build-cache`` + and False if you want to set ``--no-build-cache``. + + - For other option that expects a value (e.g `-c/--setting-file ``), a string is + expected. + + None can be provided to ANY type of option to forcefully remove it from the original build command. + + Parameters + ---------- + cli_command : GradleCLICommand + The original Gradle command, as a ``GradleCLICommand`` object from ``GradleCLICommandParser.parse(...)`` + patch_options : Mapping[str, GradleOptionPatchValueType | None] + The patch values. + + Returns + ------- + GradleCLICommand + The patched command as a new ``GradleCLICommand`` object. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + return GradleCLICommand( + executable=cli_command.executable, + options=self.apply_option_patch( + cli_command.options, + patch=options_patch, + ), + ) + + def apply_option_patch( + self, + gradle_cli_options: GradleCLIOptions, + patch: Mapping[str, GradleOptionPatchValueType | None], + ) -> GradleCLIOptions: + """Patch the Gradle CLI Options and return a new copy. + + Parameters + ---------- + gradle_cli_options: GradleCLIOptions + The Gradle CLI Options to patch. + patch: Mapping[str, GradleOptionPatchValueType | None] + A mapping between the name of the attribute in GradleCLIOptions and its patch value + + Returns + ------- + GradleCLIOptions + The new patched gradle cli options. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + if not self.validate_patch(patch): + raise PatchBuildCommandError("The patch is invalid.") + + # Copy the Maven CLI Options for patching + new_gradle_cli_options = deepcopy(gradle_cli_options) + + for option_long_name, patch_value in patch.items(): + if option_long_name == "--help": + attr_name = "help_" + elif option_long_name == "--continue": + attr_name = "continue_" + else: + # Get the attribute name of GradleCLIOption object. + # They all follow the same rule of removing the prefix -- + # from option long name and replace all "-" with "_" + attr_name = option_long_name.removeprefix("--").replace("-", "_") + + # Ensure that setting any option to None in the patch + # will remove it from the build command. + if patch_value is None: + setattr(new_gradle_cli_options, attr_name, patch_value) + continue + + if option_long_name == "--project-prop": + new_gradle_cli_options.project_prop = self._patch_properties_mapping( + original_props=new_gradle_cli_options.project_prop or {}, + option_long_name=option_long_name, + patch_value=patch_value, + ) + continue + + if option_long_name == "--system-prop": + new_gradle_cli_options.system_prop = self._patch_properties_mapping( + original_props=new_gradle_cli_options.system_prop or {}, + option_long_name=option_long_name, + patch_value=patch_value, + ) + continue + + setattr(new_gradle_cli_options, attr_name, patch_value) + + return new_gradle_cli_options diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py new file mode 100644 index 000000000..7368e1f52 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_command.py @@ -0,0 +1,324 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the classes that represent components of a Maven CLI Command.""" + +import argparse +from dataclasses import dataclass + + +@dataclass +class MavenCLIOptions: + """The class that stores the values of options parsed from a Maven CLI Command.""" + + # Optional flag. + also_make: bool | None + also_make_dependents: bool | None + batch_mode: bool | None + strict_checksums: bool | None + lax_checksums: bool | None + errors: bool | None + fail_at_end: bool | None + fail_fast: bool | None + fail_never: bool | None + help_: bool | None + non_recursive: bool | None + no_snapshot_updates: bool | None + no_transfer_progress: bool | None + quiet: bool | None + version: bool | None + show_version: bool | None + debug: bool | None + offline: bool | None + update_snapshots: bool | None + + # Single Value Option. + builder: str | None + encrypt_master_password: str | None + encrypt_password: str | None + file: str | None + global_settings: str | None + global_toolchains: str | None + log_file: str | None + resume_from: str | None + settings: str | None + toolchains: str | None + threads: str | None + + # Comma-delim list option. + activate_profiles: list[str] | None + projects: list[str] | None + + # System properties definition. + define: dict[str, str] | None + + # Maven goals and plugin phases. + goals: list[str] | None + + @classmethod + def from_parsed_arg( + cls, + parsed_arg: argparse.Namespace, + ) -> "MavenCLIOptions": + """Initialize the instance from the the argparse.Namespace object. + + Parameters + ---------- + parsed_arg : argparse.Namespace + The argparse.Namespace object obtained from parsing the CLI Command. + + Returns + ------- + MavenCLIOptions + The MavenCLIOptions object. + """ + return cls( + also_make=parsed_arg.also_make, + also_make_dependents=parsed_arg.also_make_dependents, + batch_mode=parsed_arg.batch_mode, + builder=parsed_arg.builder, + strict_checksums=parsed_arg.strict_checksums, + lax_checksums=parsed_arg.lax_checksums, + define=MavenCLIOptions.parse_system_properties(parsed_arg.define) if parsed_arg.define else None, + errors=parsed_arg.errors, + encrypt_master_password=parsed_arg.encrypt_master_password, + encrypt_password=parsed_arg.encrypt_password, + file=parsed_arg.file, + fail_at_end=parsed_arg.fail_at_end, + fail_fast=parsed_arg.fail_fast, + fail_never=parsed_arg.fail_never, + global_settings=parsed_arg.global_settings, + global_toolchains=parsed_arg.global_toolchains, + help_=parsed_arg.help_, + log_file=parsed_arg.log_file, + non_recursive=parsed_arg.non_recursive, + no_snapshot_updates=parsed_arg.no_snapshot_updates, + no_transfer_progress=parsed_arg.no_transfer_progress, + offline=parsed_arg.offline, + activate_profiles=( + MavenCLIOptions.parse_comma_sep_list(parsed_arg.activate_profiles) + if parsed_arg.activate_profiles + else None + ), + projects=MavenCLIOptions.parse_comma_sep_list(parsed_arg.projects) if parsed_arg.projects else None, + quiet=parsed_arg.quiet, + resume_from=parsed_arg.resume_from, + settings=parsed_arg.settings, + toolchains=parsed_arg.toolchains, + threads=parsed_arg.threads, + update_snapshots=parsed_arg.update_snapshots, + version=parsed_arg.version, + show_version=parsed_arg.show_version, + debug=parsed_arg.debug, + goals=parsed_arg.goals, + ) + + @staticmethod + def parse_system_properties(props: list[str]) -> dict[str, str]: + """Return a dictionary that maps between a system propertie and its value. + + Each property definition value in `props` can have either of these format: + - `property=value` (e.g. `-Dproperty=value`): this will be parsed into a + dictionary mapping of `"property": "value"`. Both the key and value + of this mapping is of type string. + - `property` (e.g. `-Dproperty`): this will be parsed into a dictionary mapping of `"property": "true"`. + + Parameters + ---------- + props: list[str] + The list of values provided to -D/--define in the cli command. + This is the list parsed by argparse. + + Returns + ------- + dict[str, str]: + The system properties dictionary. + + Examples + -------- + >>> MavenCLIOptions.parse_system_properties(["boo=true", "foo=1", "bar"]) + {'boo': 'true', 'foo': '1', 'bar': 'true'} + """ + system_props = {} + for ele in props: + prop_name, _, prop_val = ele.partition("=") + # Allow the subsequent definition override the previous one. + # This follows the way Maven is resolving system property. + # For example: + # mvn help:evaluate -Da=foo -Da=bar -Dexpression=a -q -DforceStdout + # => result for `a` is bar + # If ele doesn't have "=", for example `-Dmaven.skip.test`, we store + # the value using the value "true" string. + # + # For example: + # Maven evaluates the system property maven.skip.test to be "true" in these two commands + # mvn clean package -Dmaven.skip.test=true + # mvn clean package -Dmaven.skip.test + # To check how Maven evaluate the expression, run these commands on any project that uses maven. + # mvn help:evaluate -Dmaven.skip.test -Dexpression=maven.skip.test -q -DforceStdout + # mvn help:evaluate -Dmaven.skip.test=true -Dexpression=maven.skip.test -q -DforceStdout + if not prop_val: + system_props[prop_name] = "true" + else: + system_props[prop_name] = prop_val + + return system_props + + @staticmethod + def parse_comma_sep_list(input_val: str) -> list[str]: + """Split a comma delimited string and return a list of string elements. + + Parameters + ---------- + input_val: str + The comma delimited string. + + Returns + ------- + list[str] + The list of string elements. + + Examples + -------- + >>> MavenCLIOptions.parse_comma_sep_list("examples,release") + ['examples', 'release'] + """ + return input_val.split(",") + + def to_option_cmds(self) -> list[str]: + """Return the options as a list of strings.""" + result = self.to_cmd_no_goals() + if self.goals: + for goal in self.goals: + result.append(goal) + + return result + + def to_cmd_no_goals(self) -> list[str]: + """Return the options only as a list of string. + + Only enabled options are returned. + + Returns + ------- + list[str] + The enabled options. + """ + result = [] + + if self.also_make: + result.append("-am") + + if self.also_make_dependents: + result.append("-amd") + + if self.batch_mode: + result.append("-B") + + if self.builder: + result.extend(f"-b {self.builder}".split()) + + if self.strict_checksums: + result.append("-C") + + if self.lax_checksums: + result.append("-c") + + if self.define: + for key, value in self.define.items(): + result.append(f"-D{key}={value}") + + if self.errors: + result.append("-e") + + if self.encrypt_master_password: + result.extend(f"-emp {self.encrypt_master_password}".split()) + + if self.encrypt_password: + result.extend(f"-ep {self.encrypt_password}".split()) + + if self.file: + result.extend(f"-f {self.file}".split()) + + if self.fail_at_end: + result.append("-fae") + + if self.fail_fast: + result.append("-ff") + + if self.fail_never: + result.append("-fn") + + if self.global_settings: + result.extend(f"-gs {self.global_settings}".split()) + + if self.global_toolchains: + result.extend(f"-gt {self.global_toolchains}".split()) + + if self.help_: + result.append("-h") + + if self.log_file: + result.extend(f"-l {self.log_file}".split()) + + if self.non_recursive: + result.append("-N") + + if self.no_snapshot_updates: + result.append("-U") + + if self.no_transfer_progress: + result.append("-ntp") + + if self.offline: + result.append("-o") + + if self.activate_profiles: + result.extend(f"-P {','.join(self.activate_profiles)}".split()) + + if self.projects: + result.extend(f"-pl {','.join(self.projects)}".split()) + + if self.quiet: + result.append("-q") + + if self.resume_from: + result.extend(f"-rf {self.resume_from}".split()) + + if self.settings: + result.extend(f"-s {self.settings}".split()) + + if self.toolchains: + result.extend(f"-t {self.toolchains}".split()) + + if self.threads: + result.extend(f"-T {self.threads}".split()) + + if self.update_snapshots: + result.append("-U") + + if self.version: + result.append("-v") + + if self.show_version: + result.append("-V") + + if self.debug: + result.append("-X") + + return result + + +@dataclass +class MavenCLICommand: + """The class that stores the values of a Maven CLI Command.""" + + executable: str + options: MavenCLIOptions + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + result = [] + result.append(self.executable) + result.extend(self.options.to_option_cmds()) + return result diff --git a/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py new file mode 100644 index 000000000..454f84cb0 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/maven_cli_parser.py @@ -0,0 +1,594 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the Maven CLI Command parser.""" + +import argparse +import logging +import os +from collections.abc import Mapping +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Any, TypeGuard + +from macaron.build_spec_generator.cli_command_parser import ( + OptionDef, + PatchCommandBuildTool, + is_dict_of_str_to_str_or_none, + is_list_of_strs, + patch_mapping, +) +from macaron.build_spec_generator.cli_command_parser.maven_cli_command import MavenCLICommand, MavenCLIOptions +from macaron.errors import CommandLineParseError, PatchBuildCommandError + +logger: logging.Logger = logging.getLogger(__name__) + + +MavenOptionPatchValueType = str | list[str] | bool | dict[str, str | None] + + +@dataclass +class MavenOptionalFlag(OptionDef[bool]): + """This option represents an optional flag in Maven CLI command. + + For example: --debug/-X + + A short form for the option is rquired. + """ + + short_name: str + + # Right now this is used for --help where the default attribute name for it + # in the returned argparse.Namespace is "--help" which conflicts with the built-in function help(). + dest: str | None = field(default=None) + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[bool]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, bool) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + if self.dest: + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="store_true", + dest=self.dest, + ) + else: + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="store_true", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "bool" + + +@dataclass +class MavenSingleValue(OptionDef[str]): + """This option represents an option that takes a value in Maven CLI command. + + For example: "--settings ./path/to/pom.xml" + + A short form for the option is required. + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[str]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return isinstance(patch, str) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "str" + + +@dataclass +class MavenCommaDelimList(OptionDef[list[str]]): + """This option represents an option that takes a comma delimited value in Maven CLI command. + + This option can be defined one time only and the value is stored as a string in argparse. + However, it's stored internally as list of strings obtained by spliting its original value in argparse + using comma as the delimiter. + + For example: "-P profile1,profile2,profile3" + will be store as ["profile1", "profile2", "profile3"] + + A short form for the option is required. + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list" + + +@dataclass +class MavenSystemPropeties(OptionDef[dict[str, str | None]]): + """This option represents the -D/--define option of a Maven CLI command. + + This option can be defined multiple times and the values are appended into a list of string in argparse. + However, it's stored internally as a dictionary mapping between the system property name to its value. + + For example: ``-Dmaven.skip.test=true -Drat.skip=true`` + will be stored as ``{"maven.skip.test": "true", "rat.skip": "true"}`` + + A short form for the option is required. + """ + + short_name: str + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[dict[str, str | None]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_dict_of_str_to_str_or_none(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + arg_parse.add_argument( + *(self.short_name, self.long_name), + action="append", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "dict[str, str | None]" + + +@dataclass +class MavenGoalPhase(OptionDef[list[str]]): + """This option represents the positional goal/plugin-phase option in Maven CLI command. + + argparse.Namespace stores this as a list of string. This is stored internally as a list of string. + """ + + def is_valid_patch_option(self, patch: Any) -> TypeGuard[list[str]]: + """Return True if the provide patch value is compatible with the internal type of this option.""" + return is_list_of_strs(patch) + + def add_itself_to_arg_parser(self, arg_parse: argparse.ArgumentParser) -> None: + """Add a new argument to argparser.ArgumentParser representing this option.""" + # Doesn't require to allow cases like "mvn --help". + arg_parse.add_argument( + self.long_name, + nargs="*", + ) + + def get_patch_type_str(self) -> str: + """Return the expected type for the patch value as string.""" + return "list[str]" + + +# We intend to support Maven version 3.6.3 - 3.9 +MAVEN_OPTION_DEF: list[OptionDef] = [ + MavenOptionalFlag( + short_name="-am", + long_name="--also-make", + ), + MavenOptionalFlag( + short_name="-amd", + long_name="--also-make-dependents", + ), + MavenOptionalFlag( + short_name="-B", + long_name="--batch-mode", + ), + MavenOptionalFlag( + short_name="-C", + long_name="--strict-checksums", + ), + MavenOptionalFlag( + short_name="-c", + long_name="--lax-checksums", + ), + MavenOptionalFlag( + short_name="-cpu", + long_name="--check-plugin-updates", + ), + MavenOptionalFlag( + short_name="-e", + long_name="--errors", + ), + MavenOptionalFlag( + short_name="-fae", + long_name="--fail-at-end", + ), + MavenOptionalFlag( + short_name="-ff", + long_name="--fail-fast", + ), + MavenOptionalFlag( + short_name="-fn", + long_name="--fail-never", + ), + MavenOptionalFlag( + short_name="-h", + long_name="--help", + dest="help_", + ), + MavenOptionalFlag( + short_name="-llr", + long_name="--legacy-local-repository", + ), + MavenOptionalFlag( + short_name="-N", + long_name="--non-recursive", + ), + MavenOptionalFlag( + short_name="-nsu", + long_name="--no-snapshot-updates", + ), + MavenOptionalFlag( + short_name="-ntp", + long_name="--no-transfer-progress", + ), + MavenOptionalFlag( + short_name="-npu", + long_name="--no-plugin-updates", + ), + MavenOptionalFlag( + short_name="-npr", + long_name="--no-plugin-registry", + ), + MavenOptionalFlag( + short_name="-o", + long_name="--offline", + ), + MavenOptionalFlag( + short_name="-q", + long_name="--quiet", + ), + MavenOptionalFlag( + short_name="-U", + long_name="--update-snapshots", + ), + MavenOptionalFlag( + short_name="-up", + long_name="--update-plugins", + ), + MavenOptionalFlag( + short_name="-v", + long_name="--version", + ), + MavenOptionalFlag( + short_name="-V", + long_name="--show-version", + ), + MavenOptionalFlag( + short_name="-X", + long_name="--debug", + ), + MavenGoalPhase( + long_name="goals", + ), + # TODO: we need to confirm whether one can provide + # -P or -pl multiple times and the values will be aggregate into a list of string + # The current implementation only consider one instance of -P or -pl. + # Where to begin: + # https://github.com/apache/maven/blob/maven-3.9.x/maven-embedder/src/main/java/org/apache/maven/cli/CLIManager.java + # https://github.com/apache/commons-cli/blob/master/src/main/java/org/apache/commons/cli/Parser.java + MavenSingleValue( + short_name="-b", + long_name="--builder", + ), + MavenSystemPropeties( + short_name="-D", + long_name="--define", + ), + MavenSingleValue( + short_name="-emp", + long_name="--encrypt-master-password", + ), + MavenSingleValue( + short_name="-ep", + long_name="--encrypt-password", + ), + MavenSingleValue( + short_name="-f", + long_name="--file", + ), + MavenSingleValue( + short_name="-gs", + long_name="--global-settings", + ), + MavenSingleValue( + short_name="-gt", + long_name="--global-toolchains", + ), + MavenSingleValue( + short_name="-l", + long_name="--log-file", + ), + MavenCommaDelimList( + short_name="-P", + long_name="--activate-profiles", + ), + MavenCommaDelimList( + short_name="-pl", + long_name="--projects", + ), + MavenSingleValue( + short_name="-rf", + long_name="--resume-from", + ), + MavenSingleValue( + short_name="-s", + long_name="--settings", + ), + MavenSingleValue( + short_name="-t", + long_name="--toolchains", + ), + MavenSingleValue( + short_name="-T", + long_name="--threads", + ), +] + + +class MavenCLICommandParser: + """A Maven CLI Command Parser.""" + + ACCEPTABLE_EXECUTABLE = ["mvn", "mvnw"] + + def __init__(self) -> None: + """Initialize the instance.""" + self.arg_parser = argparse.ArgumentParser( + description="Parse Maven CLI command", + prog="mvn", + add_help=False, + # https://docs.python.org/3/library/argparse.html#exit-on-error + # Best effort of parsing the build command. Therefore, we don't want to exit on error. + exit_on_error=False, + ) + + # A mapping between the long name to its option definition. + self.option_defs: dict[str, OptionDef] = {} + + for opt_def in MAVEN_OPTION_DEF: + opt_def.add_itself_to_arg_parser(self.arg_parser) + + self.option_defs[opt_def.long_name] = opt_def + + self.build_tool = PatchCommandBuildTool.MAVEN + + def is_build_tool(self, executable_path: str) -> bool: + """Return True if ``executable_path`` ends the accepted executable for this build tool. + + Parameters + ---------- + executable_path: str + The executable component of a CLI command. + + Returns + ------- + bool + """ + return os.path.basename(executable_path) in MavenCLICommandParser.ACCEPTABLE_EXECUTABLE + + def validate_patch(self, patch: Mapping[str, MavenOptionPatchValueType | None]) -> bool: + """Return True if the patch conforms to the expected format.""" + for patch_name, patch_value in patch.items(): + opt_def = self.option_defs.get(patch_name) + if not opt_def: + logger.error("Cannot find any option that matches %s", patch_name) + return False + + if patch_value is None: + continue + + if not opt_def.is_valid_patch_option(patch_value): + logger.error( + "The patch value %s of %s is not in the correct type. Expect %s.", + patch_value, + patch_name, + opt_def.get_patch_type_str(), + ) + return False + + return True + + def parse(self, cmd_list: list[str]) -> "MavenCLICommand": + """Parse the Maven CLI Command. + + Parameters + ---------- + cmd_list: list[str] + The Maven CLI Command as list of strings. + + Returns + ------- + MavenCLICommand + The MavenCLICommand instance. + + Raises + ------ + MavenCLICommandParseError + If an error happens when parsing the Maven CLI Command. + """ + if not cmd_list: + raise CommandLineParseError("The provided cmd list is empty.") + + exe_path = cmd_list[0] + options = cmd_list[1:] + + if os.path.basename(exe_path) not in MavenCLICommandParser.ACCEPTABLE_EXECUTABLE: + raise CommandLineParseError(f"{exe_path} is not an acceptable mvn executable path.") + + # TODO: because our parser is not completed for all cases, should we be more relaxed and use + # parse_unknown_options? + try: + parsed_opts = self.arg_parser.parse_args(options) + except argparse.ArgumentError as error: + raise CommandLineParseError(f"Failed to parse command {' '.join(options)}.") from error + # Even though we have set `exit_on_error`, argparse still exists unexpectedly in some + # cases. This has been confirmed to be a bug in the argparse library implementation. + # https://github.com/python/cpython/issues/121018. + # This is fixed in Python3.12, but not Python3.11 + except SystemExit as sys_exit_err: + raise CommandLineParseError(f"Failed to parse the Maven CLI Options {' '.join(options)}.") from sys_exit_err + + # Handle cases where goal or plugin phase is not provided. + if not parsed_opts.goals: + # Allow cases such as: + # mvn --help + # mvn --version + # Note that we don't allow mvn -V or mvn --show-version as this command will + # failed for mvn + if not parsed_opts.help_ and not parsed_opts.version: + raise CommandLineParseError(f"No goal detected for {' '.join(options)}.") + + maven_cli_options = MavenCLIOptions.from_parsed_arg(parsed_opts) + + return MavenCLICommand( + executable=exe_path, + options=maven_cli_options, + ) + + def _patch_properties_mapping( + self, + original_props: dict[str, str], + option_long_name: str, + patch_value: MavenOptionPatchValueType, + ) -> dict[str, str]: + define_opt_def = self.option_defs.get(option_long_name) + if not define_opt_def or not isinstance(define_opt_def, MavenSystemPropeties): + raise PatchBuildCommandError(f"{option_long_name} from the patch is not a --define option.") + + if not define_opt_def.is_valid_patch_option(patch_value): + raise PatchBuildCommandError(f"Critical, incorrect runtime type for patch --define, value: {patch_value}.") + + return patch_mapping( + original=original_props, + patch=patch_value, + ) + + def apply_patch( + self, + cli_command: MavenCLICommand, + options_patch: Mapping[str, MavenOptionPatchValueType | None], + ) -> MavenCLICommand: + """Patch the options of a Gradle CLI command, while persisting the executable path. + + `options_patch` is a mapping with: + + - **Key**: the long name of a Maven CLI option as a string. For example: ``--define``, ``--settings``. + For patching goals or plugin phases, use the key `goals` with value being a list of string. + + - **Value**: The value to patch. The type of this value depends on the type of option you want to + patch. + + The types of patch values: + + - For optional flag (e.g ``-X/--debug``) it is boolean. True to set it and False to unset it. + + - For ``-D/--define`` ONLY, it will be a mapping between the system property name and its value. + + - For options that expects a comma delimited list of string (e.g. ``-P/--activate-profiles`` + and ``-pl/--projects``), a list of string is expected. + + - For other value option (e.g ``-s/--settings``), a string is expected. + + None can be provided to any type of option to remove it from the original build command. + + Parameters + ---------- + cli_command : MavenCLICommand + The original Maven command, as a ``MavenCLICommand`` object from ``MavenCLICommand.parse(...)`` + patch_options : Mapping[str, MavenOptionPatchValueType | None] + The patch values. + + Returns + ------- + MavenCLICommand + The patched command as a new ``MavenCLICommand`` object. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + return MavenCLICommand( + executable=cli_command.executable, + options=self.apply_option_patch( + cli_command.options, + patch=options_patch, + ), + ) + + def apply_option_patch( + self, + maven_cli_options: MavenCLIOptions, + patch: Mapping[str, MavenOptionPatchValueType | None], + ) -> MavenCLIOptions: + """Patch the Maven CLI Options and return a new copy. + + Parameters + ---------- + maven_cli_options: MavenCLIOptions + The Maven CLI Options to patch. + patch: Mapping[str, PatchValueType | None] + A mapping between the name of the attribute in MavenCLIOptions and its patch value. + The value can be None to disable an option. + + Returns + ------- + MavenCLIOptions + The new patched maven cli options. + + Raises + ------ + PatchBuildCommandError + If an error happens during the patching process. + """ + if not self.validate_patch(patch): + raise PatchBuildCommandError("The patch is invalid.") + + # Copy the Maven CLI Options for patching + new_maven_cli_options = deepcopy(maven_cli_options) + + for option_long_name, patch_value in patch.items(): + if option_long_name == "--help": + attr_name = "_help" + else: + # Get the attribute name of MavenCLIOption object. + # They all follow the same rule of removing the prefix -- + # from option long name and replace all "-" with "_" + attr_name = option_long_name.removeprefix("--").replace("-", "_") + + # Ensure that setting any option to None in the patch + # will remove it from the build command. + if patch_value is None: + setattr(new_maven_cli_options, attr_name, patch_value) + continue + + # Only for "-D/--define" we patch it differently. + if option_long_name == "--define": + new_maven_cli_options.define = self._patch_properties_mapping( + original_props=new_maven_cli_options.define or {}, + option_long_name=option_long_name, + patch_value=patch_value, + ) + continue + + setattr(new_maven_cli_options, attr_name, patch_value) + + return new_maven_cli_options diff --git a/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py b/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py new file mode 100644 index 000000000..805c5c418 --- /dev/null +++ b/src/macaron/build_spec_generator/cli_command_parser/unparsed_cli_command.py @@ -0,0 +1,20 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the class definition for a CLICommand that we don't support parsing for it.""" + +from macaron.build_spec_generator.cli_command_parser import dataclass + + +@dataclass +class UnparsedCLICommand: + """This class represents a CLICommand that we don't support parsing. + + Therefore, it only stores the original command as is. + """ + + original_cmds: list[str] + + def to_cmds(self) -> list[str]: + """Return the CLI Command as a list of strings.""" + return self.original_cmds diff --git a/tests/build_spec_generator/cli_command_parser/test_base_cli_options.py b/tests/build_spec_generator/cli_command_parser/test_base_cli_options.py new file mode 100644 index 000000000..c2a8824bf --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_base_cli_options.py @@ -0,0 +1,129 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the base_cli_options.py module.""" + +from collections.abc import Mapping +from typing import Any + +import pytest + +from macaron.build_spec_generator.cli_command_parser import ( + is_dict_of_str_to_str_or_none, + is_list_of_strs, + patch_mapping, +) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + pytest.param( + {"A": "B"}, + True, + ), + pytest.param( + {"A": None, "B": "C"}, + True, + ), + pytest.param( + {"A": "B", "C": "D"}, + True, + ), + pytest.param( + True, + False, + ), + pytest.param( + ["A", "B"], + False, + ), + pytest.param( + {"A": "B", "C": 1, "D": {}}, + False, + ), + pytest.param( + {1: "B"}, + False, + ), + ], +) +def test_is_dict_of_str_to_str_or_none(value: Any, expected: bool) -> None: + """Test the is_dict_of_str_to_str_or_none type guard.""" + assert is_dict_of_str_to_str_or_none(value) == expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + pytest.param( + ["str1", "str2"], + True, + ), + pytest.param( + [], + True, + ), + pytest.param( + {"A": "B"}, + False, + ), + pytest.param( + "str", + False, + ), + pytest.param( + True, + False, + ), + ], +) +def test_is_list_of_strs(value: Any, expected: bool) -> None: + """Test the is_list_of_strs function.""" + assert is_list_of_strs(value) == expected + + +@pytest.mark.parametrize( + ("original", "patch", "expected"), + [ + pytest.param( + {}, + {}, + {}, + ), + pytest.param( + {"boo": "foo", "bar": "far"}, + {}, + {"boo": "foo", "bar": "far"}, + ), + pytest.param( + {}, + {"boo": "foo", "bar": "far"}, + {"boo": "foo", "bar": "far"}, + ), + pytest.param( + {"boo": "foo", "bar": "far"}, + {"boo": "another_foo"}, + {"boo": "another_foo", "bar": "far"}, + ), + pytest.param( + {"boo": "foo", "bar": "far"}, + {"boo": "another_foo", "bar": None}, + {"boo": "another_foo"}, + id="Use None to remove a system property", + ), + ], +) +def test_patch_mapping( + original: Mapping[str, str], + patch: Mapping[str, str | None], + expected: Mapping[str, str], +) -> None: + """Test the patch mapping function.""" + assert ( + patch_mapping( + original=original, + patch=patch, + ) + == expected + ) diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py new file mode 100644 index 000000000..80cd3a643 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_command.py @@ -0,0 +1,156 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the gradle_cli_command module.""" + + +from typing import Any + +import pytest + +from macaron.build_spec_generator.cli_command_parser.gradle_cli_command import GradleCLIOptions +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser + + +@pytest.mark.parametrize( + ("this", "that"), + [ + pytest.param( + "gradle", + "gradle", + id="test_equal_only_executable", + ), + pytest.param( + "gradlew -S clean build -x test", + "gradlew clean build -S -x test", + id="test_different_order_of_options", + ), + pytest.param( + "gradlew clean build -Pgnupg.skip -Pskip.signing", + "gradlew clean build -Pskip.signing -Pgnupg.skip ", + id="test_properties_equal_checking", + ), + pytest.param( + "gradlew clean build -Dorg.gradle.caching=true -PmyProperty=boo", + "gradlew clean build -Dorg.gradle.caching=true -PmyProperty=boo", + id="test_properties_with_values_equal_checking", + ), + pytest.param( + "gradlew clean build -x test -x boo", + "gradlew clean build -x test -x boo", + id="test_excluded_tasks", + ), + ], +) +def test_comparing_gradle_cli_command_equal( + gradle_cli_parser: GradleCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two equal GradleCLICommand objects.""" + this_command = gradle_cli_parser.parse(this.split()) + that_command = gradle_cli_parser.parse(that.split()) + assert this_command == that_command + + +@pytest.mark.parametrize( + ("this", "that"), + [ + ("gradle clean build", "gradle clean"), + ("gradle", "gradlew"), + ("gradle clean build", "gradle clean build -PmyProperty=true"), + ("gradle clean build -Dorg.gradle.caching=true", "gradle clean build -Dorg.gradle.caching=false"), + ("gradle clean build -Dorg.gradle.caching=true", "gradle clean build -Dorg.gradle.caching"), + ("gradle clean build", "gradle clean build -c settings.gradle"), + ("gradle build", "gradle build -x test"), + # We persist the order which the task names are put into the excluded list. + # Therefore the order of the -x options is important. + ("gradle build -x test -x boo", "gradle build -x boo -x test"), + ("gradle build --no-build-cache", "gradle build --build-cache"), + ], +) +def test_comparing_gradle_cli_command_unequal( + gradle_cli_parser: GradleCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two unequal GradleCLICommand objects.""" + this_command = gradle_cli_parser.parse(this.split()) + that_command = gradle_cli_parser.parse(that.split()) + assert not this_command == that_command + + +@pytest.mark.parametrize( + ("command", "that"), + [ + ( + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + True, + ), + ( + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + ["boo", "foo"], + ), + ( + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + {"boo", "foo"}, + ), + ], +) +def test_comparing_gradle_cli_command_unequal_types( + gradle_cli_parser: GradleCLICommandParser, + command: str, + that: Any, +) -> None: + """Test comparing MavenCLICommand with another incompatible type oject.""" + this_command = gradle_cli_parser.parse(command.split()) + assert not this_command == that + + +@pytest.mark.parametrize( + ("command"), + [ + "gradle clean build -x test --debug --stacktrace -Dorg.gradle.caching=true", + "gradle", + "gradle --version", + "gradle -?", + "gradlew --build-cache --continue --no-scan", + "gradlew --build-cache --no-build-cache", + ], +) +def test_to_cmd_goals(gradle_cli_parser: GradleCLICommandParser, command: str) -> None: + """Test the to_cmd_goals method by print out the cmds and the parse it again.""" + gradle_cli_command = gradle_cli_parser.parse(command.split()) + + print_command_with_tasks = [gradle_cli_command.executable] + print_command_with_tasks.extend(gradle_cli_command.options.to_option_cmds()) + + gradle_cli_command_second = gradle_cli_parser.parse(print_command_with_tasks) + assert gradle_cli_command == gradle_cli_command_second + + +@pytest.mark.parametrize( + ("properties", "expected"), + [ + pytest.param( + ["org.gradle.caching.debug=false", "boo=foo"], + {"org.gradle.caching.debug": "false", "boo": "foo"}, + ), + pytest.param( + ["org.gradle.caching.debug=false", "org.gradle.caching.debug=true"], + {"org.gradle.caching.debug": "true"}, + id="test_overriding_behavior_from_input", + ), + pytest.param( + ["org.gradle.caching.debug=false", "boo"], + {"org.gradle.caching.debug": "false", "boo": ""}, + id="test_property_default_value", + ), + ], +) +def test_gradle_cli_option_parse_properties( + properties: list[str], + expected: dict[str, str], +) -> None: + """Test the GradleCLIOptions.parse_properties method.""" + assert GradleCLIOptions.parse_properties(properties) == expected diff --git a/tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py new file mode 100644 index 000000000..094b74a55 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_gradle_cli_parser.py @@ -0,0 +1,165 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the Gradle CLI Parser.""" + +import pytest + +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser +from macaron.errors import CommandLineParseError + + +@pytest.mark.parametrize( + ("command", "expected"), + [ + # Gradle doesn't raise error when you run it like this. + # This is because when you provide no option, it still runs a task called ":help" to + # print out the usage message. + pytest.param( + "gradle", + {"tasks": []}, + id="can_run_gradle_without_any_option", + ), + pytest.param( + "gradle -?", + {"tasks": [], "help_": True}, + id="gradle_print_help_-?", + ), + pytest.param( + "gradle --help", + {"tasks": [], "help_": True}, + id="gradle_print_help_--help", + ), + pytest.param( + "gradle -h", + {"tasks": [], "help_": True}, + id="gradle_print_help_-h", + ), + pytest.param( + "gradle --version", + {"tasks": [], "version": True}, + id="gradle_print_version_long", + ), + pytest.param( + "gradle -v", + {"tasks": [], "version": True}, + id="gradle_print_version_short", + ), + pytest.param( + "gradle clean build", + {"tasks": ["clean", "build"]}, + id="gradle_tasks", + ), + pytest.param( + "gradlew clean build", + {"tasks": ["clean", "build"]}, + id="gradle_wrapper_tasks", + ), + pytest.param( + "gradle clean build --continue", + {"tasks": ["clean", "build"], "continue_": True}, + id="test_continue_flag_with_exception_in_attribute_name", + ), + # TODO: validate if the order of the options decide the final value of + # the negateable option. + # For example: `--build-cache --no-build-cache` is different from `--no-build-cache --build-cache` + pytest.param( + "gradle clean build --build-cache --no-build-cache", + {"tasks": ["clean", "build"], "build_cache": False}, + id="both_normal_and_negated_form_can_be_provided_final_false", + ), + pytest.param( + "gradle clean build --no-build-cache --build-cache", + {"tasks": ["clean", "build"], "build_cache": True}, + id="both_normal_and_negated_form_can_be_provided_final_true", + ), + # This doesn't well represent a real gradle CLI command. + # It's just for the purpose of unit testing. + pytest.param( + "gradle clean build --continue --debug --rerun-tasks -s --console plain --build-cache", + { + "tasks": ["clean", "build"], + "continue_": True, + "debug": True, + "rerun_tasks": True, + "stacktrace": True, + "console": "plain", + "build_cache": True, + }, + id="combination_of_option_types", + ), + ], +) +def test_gradle_cli_command_parser_valid_input( + gradle_cli_parser: GradleCLICommandParser, + command: str, + expected: dict[str, str | None | bool | list[str]], +) -> None: + """Test the gradle cli parser on valid input.""" + parsed_res = gradle_cli_parser.parse(command.split()) + + all_attrs = vars(parsed_res.options).keys() + + for attribute in all_attrs: + if attribute in expected: + assert getattr(parsed_res.options, attribute) == expected[attribute] + else: + # Making sure that we are not enabling flags that are not part of the + # build command. + # We don't compare it to None because some options if not set, argparse + # will assign a different Falsy value depending on the option type. + assert not getattr(parsed_res.options, attribute) + + +@pytest.mark.parametrize( + ("build_command", "expected"), + [ + pytest.param( + "gradle clean build --debug --stacktrace", + "gradle", + ), + pytest.param( + "./gradlew clean build --debug --stacktrace", + "./gradlew", + ), + pytest.param( + "./boo/gradlew clean build --debug --stacktrace", + "./boo/gradlew", + ), + ], +) +def test_gradle_cli_command_parser_executable( + gradle_cli_parser: GradleCLICommandParser, + build_command: str, + expected: str, +) -> None: + """Test the Gradle CLI command parser correctly persisting the executable string.""" + parse_res = gradle_cli_parser.parse(build_command.split()) + assert parse_res.executable == expected + + +@pytest.mark.parametrize( + ("build_command"), + [ + pytest.param("", id="An empty command"), + pytest.param( + "gradle --this-argument-should-never-exist-in-gradle", + id="unrecognized_optional_argument", + ), + pytest.param( + "gradle --this-argument-should-never-exist-in-gradle some-value", + id="unrecognized_value_option", + ), + pytest.param( + "./graaadddllewww clean build", + id="unrecognized_executable_path", + ), + ], +) +def test_gradle_cli_command_parser_invalid_input( + gradle_cli_parser: GradleCLICommandParser, + build_command: str, +) -> None: + """Test the Gradle CLI command parser on invalid input.""" + with pytest.raises(CommandLineParseError): + gradle_cli_parser.parse(build_command.split()) diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py new file mode 100644 index 000000000..d0e681e57 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_command.py @@ -0,0 +1,142 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the maven_cli_command module.""" + +from typing import Any + +import pytest + +from macaron.build_spec_generator.cli_command_parser.maven_cli_command import MavenCLIOptions +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import MavenCLICommandParser + + +@pytest.mark.parametrize( + ("this", "that"), + [ + pytest.param( + "mvn clean package", + "mvn clean package", + id="totally_equal", + ), + pytest.param( + "mvn -X clean package -P project1,project2", + "mvn clean package -X -P project1,project2", + id="test_different_order_of_options", + ), + pytest.param( + "mvn clean package -Dmaven.skip.test=true", + "mvn clean package -Dmaven.skip.test", + id="test_default_value_for_system_property", + ), + ], +) +def test_comparing_maven_cli_command_equal( + maven_cli_parser: MavenCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two equal MavenCLICommand objects.""" + this_command = maven_cli_parser.parse(this.split()) + that_command = maven_cli_parser.parse(that.split()) + assert this_command == that_command + + +@pytest.mark.parametrize( + ("this", "that"), + [ + ("mvn clean package", "mvn install"), + ("mvn clean package", "mvn clean package -X"), + ("mvn clean package", "mvn clean package -P project1,project2"), + ("mvn clean package", "mvn clean package -Dmaven.skip.test=true"), + ("mvn clean package", "mvn clean package --settings ./pom.xml"), + ("mvn clean package", "mvn package clean"), + ("mvn clean package", "mvnw clean package"), + ], +) +def test_comparing_maven_cli_command_unequal( + maven_cli_parser: MavenCLICommandParser, + this: str, + that: str, +) -> None: + """Test comparing two unequal MavenCLICommand objects.""" + this_command = maven_cli_parser.parse(this.split()) + that_command = maven_cli_parser.parse(that.split()) + assert not this_command == that_command + + +@pytest.mark.parametrize( + ("command", "that"), + [ + ( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + True, + ), + ( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + ["boo", "foo"], + ), + ( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + {"boo", "foo"}, + ), + ], +) +def test_comparing_maven_cli_command_unequal_types( + maven_cli_parser: MavenCLICommandParser, + command: str, + that: Any, +) -> None: + """Test comparing MavenCLICommand with another incompatible type oject.""" + this_command = maven_cli_parser.parse(command.split()) + assert not this_command == that + + +@pytest.mark.parametrize( + ("command"), + [ + "mvn clean package", + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + "mvn -f fit/core-reference/pom.xml verify -Dit.test=RESTITCase -Dinvoker.streamLogs=true" + + " -Dmodernizer.skip=true -Drat.skip=true -Dcheckstyle.skip=true -Djacoco.skip=true", + "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun", + "mvn clean package -Dmaven.test.skip", + ], +) +def test_to_cmd_goals(maven_cli_parser: MavenCLICommandParser, command: str) -> None: + """Test the to_cmd_goals method by print out the cmds and the parse it again.""" + maven_cli_command = maven_cli_parser.parse(command.split()) + + print_command_with_goals = [maven_cli_command.executable] + print_command_with_goals.extend(maven_cli_command.options.to_option_cmds()) + + maven_cli_command_second = maven_cli_parser.parse(print_command_with_goals) + assert maven_cli_command == maven_cli_command_second + + +@pytest.mark.parametrize( + ("properties", "expected"), + [ + pytest.param( + ["maven.skip.true=true", "boo=foo"], + {"maven.skip.true": "true", "boo": "foo"}, + ), + pytest.param( + ["maven.skip.true=true", "maven.skip.true=false", "maven.skip.true=true"], + {"maven.skip.true": "true"}, + id="test_overriding_behavior_from_input", + ), + pytest.param( + # For example one can specify mvn clean package -Dmaven.skip.true=true -Dboo + ["maven.skip.true=true", "boo"], + {"maven.skip.true": "true", "boo": "true"}, + id="test_system_property_default_value", + ), + ], +) +def test_maven_cli_option_parse_system_properties( + properties: list[str], + expected: dict[str, str], +) -> None: + """Test the MavenCLIOptions.parse_system_properties method.""" + assert MavenCLIOptions.parse_system_properties(properties) == expected diff --git a/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py new file mode 100644 index 000000000..d219e1af2 --- /dev/null +++ b/tests/build_spec_generator/cli_command_parser/test_maven_cli_parser.py @@ -0,0 +1,197 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for maven cli parser.""" + + +import pytest + +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( + CommandLineParseError, + MavenCLICommandParser, +) + + +@pytest.mark.parametrize( + ("command", "expected"), + [ + pytest.param( + "mvn clean package", + {"goals": ["clean", "package"]}, + id="goal_only_no_option", + ), + # https://maven.apache.org/guides/introduction/introduction-to-the-lifecycle.html#Build_Lifecycle_Basics + pytest.param( + "mvn clean dependency:copy-dependencies package", + {"goals": ["clean", "dependency:copy-dependencies", "package"]}, + id="goal_and_phase_mix", + ), + pytest.param( + "mvn clean package -P profile1,profile2 -T 2C -ntp -Dmaven.skip.test=true -Dboo=foo", + { + "goals": ["clean", "package"], + # "-P" + "activate_profiles": ["profile1", "profile2"], + # "-T" + "threads": "2C", + # "-ntp" + "no_transfer_progress": True, + # "-D=" + "define": {"maven.skip.test": "true", "boo": "foo"}, + }, + id="test_combination_options", + ), + pytest.param( + "mvn clean package -Dmaven.skip.test=true -Dmaven.skip.test=false", + { + "goals": ["clean", "package"], + "define": {"maven.skip.test": "false"}, + }, + id="multiple_definition_of_the_same_property_override_each_other", + ), + pytest.param( + "mvn clean package -Dmaven.skip.test", + { + "goals": ["clean", "package"], + "define": {"maven.skip.test": "true"}, + }, + id="test_default_value_if_no_value_is_provided_for_a_property", + ), + # A modified version of + # https://github.com/apache/syncope/blob/9437c6c978ca8c03b5e5cccc40a5a352be1ecc52/.github/workflows/crosschecks.yml#L70 + pytest.param( + "mvn -f fit/core-reference/pom.xml verify -Dit.test=RESTITCase -Dinvoker.streamLogs=true " + "-Dmodernizer.skip=true -Drat.skip=true -Dcheckstyle.skip=true -Djacoco.skip=true", + { + "file": "fit/core-reference/pom.xml", + "goals": ["verify"], + "define": { + "it.test": "RESTITCase", + "invoker.streamLogs": "true", + "modernizer.skip": "true", + "rat.skip": "true", + "checkstyle.skip": "true", + "jacoco.skip": "true", + }, + }, + id="pkg:maven/org.apache.syncope.common.keymaster.self/syncope-common-keymaster-client-self@3.0.0", + ), + # https://github.com/apache/activemq-artemis/blob/2.27.1/.github/workflows/build.yml + pytest.param( + "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun", + { + "settings": "../.github/maven-settings.xml", + "goals": ["install"], + "activate_profiles": ["examples", "noRun"], + }, + id="pkg:maven/org.apache.activemq/artemis-log-annotation-processor@2.27.1", + ), + pytest.param( + "mvn --help", + { + "goals": [], + "help_": True, + }, + id="allow_no_goal_for_help", + ), + pytest.param( + "mvn --version", + { + "goals": [], + "help_": False, + "version": True, + }, + id="allow_no_goal_for_version", + ), + pytest.param( + "mvn --help --version", + { + "goals": [], + "help_": True, + "version": True, + }, + id="allow_no_goal_for_version_and_help", + ), + ], +) +def test_maven_cli_command_parser_valid_input( + maven_cli_parser: MavenCLICommandParser, + command: str, + expected: dict[str, str | None | bool | list[str]], +) -> None: + """Test the maven cli parser on valid input.""" + parsed_res = maven_cli_parser.parse(command.split()) + + all_attrs = vars(parsed_res.options).keys() + + for attribute in all_attrs: + if attribute in expected: + assert getattr(parsed_res.options, attribute) == expected[attribute] + else: + # Making sure that we are not enabling flags that are not part of the + # build command. + # We don't compare it to None because some options if not set, argparse + # will assign a different Falsy value depending on the option type. + # For example + # - If `--help` is not provide, its value will be False + # - If `--settings` is not provided, its value will be None. + assert not getattr(parsed_res.options, attribute) + + +@pytest.mark.parametrize( + ("build_command", "expected"), + [ + pytest.param( + "mvn clean package -X -ntp", + "mvn", + ), + pytest.param( + "mvnw clean package -X -ntp", + "mvnw", + ), + pytest.param( + "./boo/mvnw clean package -X -ntp", + "./boo/mvnw", + ), + ], +) +def test_maven_cli_command_parser_executable( + maven_cli_parser: MavenCLICommandParser, + build_command: str, + expected: str, +) -> None: + """Test the Maven CLI command correctly persisting the executable string.""" + parse_res = maven_cli_parser.parse(build_command.split()) + assert parse_res.executable == expected + + +@pytest.mark.parametrize( + ("build_command"), + [ + pytest.param("", id="An empty command"), + pytest.param("mvn", id="No goal or phase"), + pytest.param( + "mvn --this-argument-should-never-exist-in-mvn", + id="unrecognized_optional_argument", + ), + pytest.param( + "mvn --this-argument-should-never-exist-in-mvn some-value", + id="unrecognized_value_option", + ), + pytest.param( + "mmmvvvnnn clean package", + id="unrecognized_executable_path", + ), + pytest.param( + "mvn --show-version", + id="show_version_with_no_goal", + ), + ], +) +def test_maven_cli_command_parser_invalid_input( + maven_cli_parser: MavenCLICommandParser, + build_command: str, +) -> None: + """Test the Maven CLI command parser on invalid input.""" + with pytest.raises(CommandLineParseError): + maven_cli_parser.parse(build_command.split()) diff --git a/tests/conftest.py b/tests/conftest.py index 7e97461d0..d4ed2ab1b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,8 @@ from pytest_httpserver import HTTPServer import macaron +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import MavenCLICommandParser from macaron.code_analyzer.call_graph import BaseNode, CallGraph from macaron.config.defaults import create_defaults, defaults, load_defaults from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository @@ -491,3 +493,15 @@ def deps_dev_service_mock_(httpserver: HTTPServer, tmp_path: Path) -> dict: "base_scheme": base_url_parsed.scheme, "base_netloc": base_url_parsed.netloc, } + + +@pytest.fixture(scope="module") +def maven_cli_parser() -> MavenCLICommandParser: + """Return a MvnCLICommandParser instance with a module scope.""" + return MavenCLICommandParser() + + +@pytest.fixture(scope="module") +def gradle_cli_parser() -> GradleCLICommandParser: + """Return a GradleCLICommandParser instance with a module scope.""" + return GradleCLICommandParser() From 8203dae88e167fa04bb10d516716827d50e2f5fe Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:13:47 +1000 Subject: [PATCH 03/14] feat: add jdk version finder from maven central java artifacts Signed-off-by: Trong Nhan Mai --- .../build_spec_generator/jdk_finder.py | 340 ++++++++++++++++++ .../test_jdk_version_finder.py | 102 ++++++ 2 files changed, 442 insertions(+) create mode 100644 src/macaron/build_spec_generator/jdk_finder.py create mode 100644 tests/build_spec_generator/test_jdk_version_finder.py diff --git a/src/macaron/build_spec_generator/jdk_finder.py b/src/macaron/build_spec_generator/jdk_finder.py new file mode 100644 index 000000000..dc9ef8cd4 --- /dev/null +++ b/src/macaron/build_spec_generator/jdk_finder.py @@ -0,0 +1,340 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module includes the functions for obtaining JDK version from a Java artifact.""" + +import logging +import os +import tempfile +import urllib.parse +import zipfile +from enum import Enum + +import requests + +from macaron.artifact.maven import construct_maven_repository_path +from macaron.config.global_config import global_config +from macaron.errors import InvalidHTTPResponseError + +logger: logging.Logger = logging.getLogger(__name__) + + +class JavaArtifactExt(str, Enum): + """The extensions for Java artifacts.""" + + JAR = ".jar" + + +def download_file(url: str, dest: str) -> None: + """Stream a file into a local destination. + + Parameters + ---------- + url: str + The URL of the file to stream from. + dest: str + The path to the destination file in the local file system. This path + includes the file name. + + Raises + ------ + InvalidHTTPResponseError + If an error happens while streaming the file. + OSError + If the parent directory of ``dest`` doesn't exist. + """ + response = requests.get(url=url, stream=True, timeout=40) + + if response.status_code != 200: + raise InvalidHTTPResponseError(f"Cannot download java artifact file from {url}") + + with open(dest, "wb") as fd: + try: + for chunk in response.iter_content(chunk_size=128, decode_unicode=False): + fd.write(chunk) + except requests.RequestException as error: + response.close() + raise InvalidHTTPResponseError(f"Error while streaming java artifact file from {url}") from error + + +def join_remote_maven_repo_url( + remote_maven_url: str, + maven_repo_path: str, +) -> str: + """Join the base remote maven URL with a maven repository path. + + Parameters + ---------- + remote_maven_url: str + The url to a remove maven layout repository. + For example: https://repo1.maven.org/maven2 + maven_repo_path: str + The maven repository path for a GAV coordinate or an artifact + from the root of the remote maven layout repository. + + Returns + ------- + str + The joined path. + + Examples + -------- + >>> remote_maven_repo = "https://repo1.maven.org/maven2" + >>> artifact_path = "io/liftwizard/liftwizard-checkstyle/2.1.22/liftwizard-checkstyle-2.1.22.jar" + >>> join_remote_maven_repo_url(remote_maven_repo, artifact_path) + 'https://repo1.maven.org/maven2/io/liftwizard/liftwizard-checkstyle/2.1.22/liftwizard-checkstyle-2.1.22.jar' + >>> join_remote_maven_repo_url(remote_maven_repo, "io/liftwizard/liftwizard-checkstyle/2.1.22/") + 'https://repo1.maven.org/maven2/io/liftwizard/liftwizard-checkstyle/2.1.22/' + >>> join_remote_maven_repo_url(f"{remote_maven_repo}/", artifact_path) + 'https://repo1.maven.org/maven2/io/liftwizard/liftwizard-checkstyle/2.1.22/liftwizard-checkstyle-2.1.22.jar' + """ + url_parse_result = urllib.parse.urlparse(remote_maven_url) + new_path_component = os.path.join( + url_parse_result.path, + maven_repo_path, + ) + return urllib.parse.urlunparse( + urllib.parse.ParseResult( + scheme=url_parse_result.scheme, + netloc=url_parse_result.netloc, + path=new_path_component, + params="", + query="", + fragment="", + ) + ) + + +def get_jdk_version_from_jar(artifact_path: str) -> str | None: + """Return the JDK version obtained from a Java artifact. + + Parameters + ---------- + artifact_path: str + The path to the artifact to extract the jdk version. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + if there is an error, or if we couldn't find any jdk version. + """ + with zipfile.ZipFile(artifact_path, "r") as jar: + manifest_path = "META-INF/MANIFEST.MF" + with jar.open(manifest_path) as manifest_file: + manifest_content = manifest_file.read().decode("utf-8") + for line in manifest_content.splitlines(): + if "Build-Jdk" in line or "Build-Jdk-Spec" in line: + _, _, version = line.rpartition(":") + logger.debug( + "Found JDK version %s from java artifact at %s", + version.strip(), + artifact_path, + ) + return version.strip() + + logger.debug("Cannot find any JDK version from java artifact at %s", artifact_path) + return None + + +def find_jdk_version_from_remote_maven_repo_standalone( + group_id: str, + artifact_id: str, + version: str, + asset_name: str, + remote_maven_repo_url: str, +) -> str | None: + """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. + + This function doesn't cache the downloaded artifact, and remove it after the function exits. + We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. + + Parameters + ---------- + group_id: str + The group ID part of the GAV coordinate. + artifact_id: str + The artifact ID part of the GAV coordinate. + version: str + The version part of the GAV coordinate. + asset_name: str + The name of artifact to download and extract the jdk version. + ext: JavaArtifactExt + The extension of the main artifact file. + remote_maven_repo_url: str + The URL to the remote maven layout repository. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + ff there is an error, or if we couldn't find any jdk version. + """ + maven_repository_path = construct_maven_repository_path( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + ) + + artifact_url = join_remote_maven_repo_url( + remote_maven_repo_url, + maven_repository_path, + ) + logger.debug( + "Find JDK version from jar at %s, using temporary file.", + artifact_url, + ) + with tempfile.TemporaryDirectory() as temp_dir_name: + local_artifact_path = os.path.join(temp_dir_name, asset_name) + try: + download_file( + artifact_url, + local_artifact_path, + ) + except InvalidHTTPResponseError as error: + logger.error("Failed why trying to download jar file. Error: %s", error) + return None + except OSError as os_error: + logger.critical("Critical %s", os_error) + return None + + return get_jdk_version_from_jar(local_artifact_path) + + +def find_jdk_version_from_remote_maven_repo_cache( + group_id: str, + artifact_id: str, + version: str, + asset_name: str, + remote_maven_repo_url: str, + local_cache_repo: str, +) -> str | None: + """Return the jdk version string from an artifact matching a given GAV from a remote maven layout repository. + + This function cache the downloaded artifact in a maven layout https://maven.apache.org/repository/layout.html + undert ``local_cache_repo``. + We assume that the remote maven layout repository supports downloading a file through a HTTPS URL. + + Parameters + ---------- + group_id: str + The group ID part of the GAV coordinate. + artifact_id: str + The artifact ID part of the GAV coordinate. + version: str + The version part of the GAV coordinate. + asset_name: str + The name of artifact to download and extract the jdk version. + remote_maven_repo_url: str + The URL to the remote maven layout repository. + local_cache_repo: str + The path to a local directory for caching the downloaded artifact used in JDK version + extraction. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + ff there is an error, or if we couldn't find any jdk version. + """ + maven_repository_path = construct_maven_repository_path( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + ) + + local_artifact_path = os.path.join( + local_cache_repo, + maven_repository_path, + ) + if os.path.isfile(local_artifact_path): + return get_jdk_version_from_jar(local_artifact_path) + + gav_path = os.path.dirname(local_artifact_path) + os.makedirs( + gav_path, + exist_ok=True, + ) + + artifact_url = join_remote_maven_repo_url( + remote_maven_repo_url, + maven_repository_path, + ) + logger.debug( + "Find JDK version from jar at %s, using cache %s", + artifact_url, + local_artifact_path, + ) + try: + download_file( + artifact_url, + local_artifact_path, + ) + except InvalidHTTPResponseError as error: + logger.error("Failed why trying to download jar file. Error: %s", error) + return None + except OSError as os_error: + logger.critical("Critical %s", os_error) + return None + + return get_jdk_version_from_jar(local_artifact_path) + + +def find_jdk_version_from_central_maven_repo( + group_id: str, + artifact_id: str, + version: str, + use_cache: bool = True, +) -> str | None: + """Return the jdk version string from an artifact matching a given GAV from Maven Central repository. + + The artifacts will be downloaded from https://repo1.maven.org/maven2/ for JDK version extraction. + + We now only support JAR files. + + Parameters + ---------- + group_id: str + The group ID part of the GAV coordinate. + artifact_id: str + The artifact ID part of the GAV coordinate. + version: str + The version part of the GAV coordinate. + remote_maven_repo_url: str + The URL to the remote maven layout repository. + local_cache_repo: str + The path to a local directory for caching the downloaded artifact used in JDK version + extraction. + + Returns + ------- + str | None + The version string extract from the artifact (as is) or None + ff there is an error, or if we couldn't find any jdk version. + """ + central_repo_url = "https://repo1.maven.org/maven2/" + local_cache_maven_repo = os.path.join( + global_config.output_path, + "jdk_finding_cache_maven_repo", + ) + asset_name = f"{artifact_id}-{version}{JavaArtifactExt.JAR.value}" + + if use_cache: + return find_jdk_version_from_remote_maven_repo_cache( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + remote_maven_repo_url=central_repo_url, + local_cache_repo=local_cache_maven_repo, + ) + + return find_jdk_version_from_remote_maven_repo_standalone( + group_id=group_id, + artifact_id=artifact_id, + version=version, + asset_name=asset_name, + remote_maven_repo_url=central_repo_url, + ) diff --git a/tests/build_spec_generator/test_jdk_version_finder.py b/tests/build_spec_generator/test_jdk_version_finder.py new file mode 100644 index 000000000..f9df00569 --- /dev/null +++ b/tests/build_spec_generator/test_jdk_version_finder.py @@ -0,0 +1,102 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the jdk_finder module.""" + +import zipfile +from pathlib import Path + +import pytest + +from macaron.build_spec_generator.jdk_finder import get_jdk_version_from_jar, join_remote_maven_repo_url + + +@pytest.mark.parametrize( + ("remote_maven_url", "maven_repo_path", "expected"), + [ + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/", + "https://repo1.maven.org/maven2/com/oracle/", + id="g_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/macaron/", + "https://repo1.maven.org/maven2/com/oracle/macaron/", + id="ga_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/macaron/0.16.0/", + "https://repo1.maven.org/maven2/com/oracle/macaron/0.16.0/", + id="gav_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2", + "com/oracle/macaron/0.16.0/macaron-0.16.0.jar", + "https://repo1.maven.org/maven2/com/oracle/macaron/0.16.0/macaron-0.16.0.jar", + id="gav_asset_coordinate", + ), + pytest.param( + "https://repo1.maven.org/maven2/", + "com/oracle/macaron/0.16.0/", + "https://repo1.maven.org/maven2/com/oracle/macaron/0.16.0/", + id="handle_trailing_slash_in_remote_maven_url", + ), + ], +) +def test_join_remote_maven_repo_url( + remote_maven_url: str, + maven_repo_path: str, + expected: str, +) -> None: + """Test the join remote maven repo url function.""" + assert ( + join_remote_maven_repo_url( + remote_maven_url=remote_maven_url, + maven_repo_path=maven_repo_path, + ) + == expected + ) + + +@pytest.mark.parametrize( + ("manifest_mf_content", "expected"), + [ + ("Build-Jdk: 1.8", "1.8"), + ("Build-Jdk-Spec: 8", "8"), + ], +) +def test_get_jdk_version_from_jar_succeed( + tmp_path: Path, + manifest_mf_content: str, + expected: str, +) -> None: + """Test the get_jdk_version_from_jar function on valid cases.""" + test_jar_file = tmp_path / "example.jar" + + with zipfile.ZipFile(test_jar_file, mode="w") as test_jar: + test_jar.writestr("META-INF/MANIFEST.MF", manifest_mf_content) + + assert get_jdk_version_from_jar(str(test_jar_file)) == expected + + +@pytest.mark.parametrize( + ("manifest_mf_content"), + [ + (""), + ("Build-Jdk-Spec: "), + ], +) +def test_get_jdk_version_from_jar_failed( + tmp_path: Path, + manifest_mf_content: str, +) -> None: + """Test the get_jdk_version_from_jar function on error cases.""" + test_jar_file = tmp_path / "example.jar" + + with zipfile.ZipFile(test_jar_file, mode="w") as test_jar: + test_jar.writestr("META-INF/MANIFEST.MF", manifest_mf_content) + + assert not get_jdk_version_from_jar(str(test_jar_file)) From e1f89949f940b8ad801dbd30353002027b5b66b8 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:14:54 +1000 Subject: [PATCH 04/14] feat: add cli build command patcher Signed-off-by: Trong Nhan Mai --- .../build_command_patcher.py | 137 +++++ .../test_build_command_patcher.py | 561 ++++++++++++++++++ 2 files changed, 698 insertions(+) create mode 100644 src/macaron/build_spec_generator/build_command_patcher.py create mode 100644 tests/build_spec_generator/test_build_command_patcher.py diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py new file mode 100644 index 000000000..f6bac755f --- /dev/null +++ b/src/macaron/build_spec_generator/build_command_patcher.py @@ -0,0 +1,137 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the implementation of the build command patching.""" + +import logging +from collections.abc import Mapping, Sequence + +from macaron.build_spec_generator.cli_command_parser import CLICommand, CLICommandParser, PatchCommandBuildTool +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import ( + GradleCLICommandParser, + GradleOptionPatchValueType, +) +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( + CommandLineParseError, + MavenCLICommandParser, + MavenOptionPatchValueType, + PatchBuildCommandError, +) +from macaron.build_spec_generator.cli_command_parser.unparsed_cli_command import UnparsedCLICommand + +logger: logging.Logger = logging.getLogger(__name__) + +MVN_CLI_PARSER = MavenCLICommandParser() +GRADLE_CLI_PARSER = GradleCLICommandParser() + +PatchValueType = GradleOptionPatchValueType | MavenOptionPatchValueType + + +def _patch_commands( + cmds_sequence: Sequence[list[str]], + cli_parsers: Sequence[CLICommandParser], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> list[CLICommand] | None: + """Patch the sequence of build commands, using the provided CLICommandParser instances. + + For each command in `cmds_sequence`, it will be checked against all CLICommandParser instances until there is + one that can parse it, then a patch from ``patches`` is applied for this command if provided. + + If a command doesn't have any corresponding ``CLICommandParser`` instance it will be parsed as UnparsedCLICommand, + which just holds the original command as a list of string, without any changes. + """ + result: list[CLICommand] = [] + for cmds in cmds_sequence: + effective_cli_parser = None + for cli_parser in cli_parsers: + if cli_parser.is_build_tool(cmds[0]): + effective_cli_parser = cli_parser + break + + if not effective_cli_parser: + result.append(UnparsedCLICommand(original_cmds=cmds)) + continue + + try: + cli_command = effective_cli_parser.parse(cmds) + except CommandLineParseError as error: + logger.error( + "Failed to parse the mvn command %s. Error %s.", + " ".join(cmds), + error, + ) + return None + + patch = patches.get(effective_cli_parser.build_tool, None) + if not patch: + result.append(cli_command) + continue + + try: + new_cli_command = effective_cli_parser.apply_patch( + cli_command=cli_command, + options_patch=patch, + ) + except PatchBuildCommandError as error: + logger.error( + "Failed to patch the mvn command %s. Error %s.", + " ".join(cmds), + error, + ) + return None + + result.append(new_cli_command) + + return result + + +def patch_commands( + cmds_sequence: Sequence[list[str]], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> list[list[str]] | None: + """Patch a sequence of CLI commands. + + For each command in this command sequence: + + - If the command is not a build command or the build tool is not supported by us, it will be leave intact. + + - If the command is a build command supported by us, it will be patch if a patch value is provided to ``patches``. + If no patch value is provided for a build command, it will be leave intact. + + `patches` is a mapping with: + + - **Key**: an instance of the ``BuildTool`` enum + + - **Value**: the patch value provided to ``CLICommandParser.apply_patch``. For more information on the patch value + see the concrete implementations of the ``CLICommandParser.apply_patch`` method. + For example: :class:`macaron.cli_command_parser.maven_cli_parser.MavenCLICommandParser.apply_patch`, + :class:`macaron.cli_command_parser.gradle_cli_parser.GradleCLICommandParser.apply_patch`. + + This means that all commands that matches a BuildTool will be apply by the same patch value. + + Returns + ------- + list[list[str]] | None + The patched command sequence or None if there is an error. The errors that can happen if any command + which we support is invalid in ``cmds_sequence``, or the patch value is valid. + """ + result = [] + patch_cli_commands = _patch_commands( + cmds_sequence=cmds_sequence, + cli_parsers=[MVN_CLI_PARSER, GRADLE_CLI_PARSER], + patches=patches, + ) + + if patch_cli_commands is None: + return None + + for patch_cmd in patch_cli_commands: + result.append(patch_cmd.to_cmds()) + + return result diff --git a/tests/build_spec_generator/test_build_command_patcher.py b/tests/build_spec_generator/test_build_command_patcher.py new file mode 100644 index 000000000..b96de56d4 --- /dev/null +++ b/tests/build_spec_generator/test_build_command_patcher.py @@ -0,0 +1,561 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the test for the build command patcher.""" + +from collections.abc import Mapping + +import pytest + +from macaron.build_spec_generator.build_command_patcher import ( + CLICommand, + CLICommandParser, + PatchValueType, + _patch_commands, +) +from macaron.build_spec_generator.cli_command_parser import PatchCommandBuildTool +from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import ( + GradleCLICommandParser, + GradleOptionPatchValueType, +) +from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import ( + MavenCLICommandParser, + MavenOptionPatchValueType, +) +from macaron.build_spec_generator.cli_command_parser.unparsed_cli_command import UnparsedCLICommand + + +@pytest.mark.parametrize( + ("original", "patch_options", "expected"), + [ + pytest.param( + "mvn install -X", + {}, + "mvn install -X", + id="no_patch_value", + ), + pytest.param( + "mvn install -X", + {"goals": ["clean", "package"]}, + "mvn clean package -X", + id="patch_goals_should_persist_order", + ), + pytest.param( + "mvn install", + { + "--no-transfer-progress": True, + }, + "mvn install -ntp", + id="patching_an_optional_flag", + ), + pytest.param( + "mvn install", + { + "--threads": "2C", + }, + "mvn install -T 2C", + id="patching_single_value_option", + ), + pytest.param( + "mvn install", + { + "--activate-profiles": ["profile1", "profile2"], + }, + "mvn install -P profile1,profile2", + id="patching_comma_delimt_list_value_option", + ), + pytest.param( + "mvn install", + { + "--define": { + "maven.skip.test": "true", + "rat.skip": "true", + }, + }, + "mvn install -Dmaven.skip.test=true -Drat.skip=true", + id="patching_system_properties", + ), + # The patch for -D/--define merge with the original the system properties. The patch will always takes precedence. + pytest.param( + "mvn install -Dmaven.skip.test=false -Dboo=foo", + { + "goals": ["clean", "package"], + "--define": { + "maven.skip.test": "true", + "rat.skip": "true", + }, + }, + "mvn clean package -Dmaven.skip.test=true -Drat.skip=true -Dboo=foo", + id="patching_system_properties_merging", + ), + pytest.param( + "mvn install -Dmaven.skip.test=false -Dboo=foo", + { + "goals": ["clean", "package"], + "--define": { + "maven.skip.test": None, + "rat.skip": "true", + }, + }, + "mvn clean package -Drat.skip=true -Dboo=foo", + id="patching_system_properties_disable", + ), + pytest.param( + "mvn install -T 2C -ntp -Dmaven.skip.test=true", + { + "--threads": None, + "--no-transfer-progress": None, + "--define": None, + }, + "mvn install", + id="removing_any_option_using_None", + ), + ], +) +def test_patch_mvn_cli_command( + maven_cli_parser: MavenCLICommandParser, + original: str, + patch_options: Mapping[str, MavenOptionPatchValueType | None], + expected: str, +) -> None: + """Test the patch maven cli command on valid input.""" + patch_cmds = _patch_commands( + cmds_sequence=[original.split()], + cli_parsers=[maven_cli_parser], + patches={PatchCommandBuildTool.MAVEN: patch_options}, + ) + assert patch_cmds + assert len(patch_cmds) == 1 + + patch_mvn_cli_command = maven_cli_parser.parse(patch_cmds.pop().to_cmds()) + expected_mvn_cli_command = maven_cli_parser.parse(expected.split()) + + assert patch_mvn_cli_command == expected_mvn_cli_command + + +@pytest.mark.parametrize( + ("invalid_patch"), + [ + pytest.param( + { + "--this-option-should-never-exist": True, + }, + id="unrecognised_option_name", + ), + pytest.param( + { + "--define": True, + }, + id="incorrect_define_option_type", + ), + pytest.param( + { + "--debug": "some_value", + }, + id="incorrect_debug_option_type", + ), + pytest.param( + { + "--settings": False, + }, + id="incorrect_settings_option_type", + ), + pytest.param( + { + "--activate-profiles": False, + }, + id="incorrect_activate_profiles_option_type", + ), + ], +) +def test_patch_mvn_cli_command_error( + maven_cli_parser: MavenCLICommandParser, + invalid_patch: dict[str, MavenOptionPatchValueType | None], +) -> None: + """Test patch mvn cli command patching with invalid patch.""" + cmd_list = "mvn -s ../.github/maven-settings.xml install -Pexamples,noRun".split() + + assert ( + _patch_commands( + cmds_sequence=[cmd_list], + cli_parsers=[maven_cli_parser], + patches={ + PatchCommandBuildTool.MAVEN: invalid_patch, + }, + ) + is None + ) + + +@pytest.mark.parametrize( + ("original", "patch_options", "expected"), + [ + pytest.param( + "gradle --build-cache clean build", + {}, + "gradle --build-cache clean build", + id="no_patch_value", + ), + pytest.param( + "gradle --build-cache clean build", + {"--build-cache": False}, + "gradle --no-build-cache clean build", + id="test_patching_negateable_option", + ), + pytest.param( + "gradle clean", + {"tasks": ["clean", "build"]}, + "gradle clean build", + id="patch_tasks_should_persist_order", + ), + pytest.param( + "gradle clean build", + {"--debug": True}, + "gradle --debug clean build", + id="patching_an_optional_flag", + ), + pytest.param( + "gradle clean build", + { + "--debug": True, + "--continue": True, + }, + "gradle --debug --continue clean build", + id="patching_an_optional_flag", + ), + pytest.param( + "gradle clean build", + {"--console": "plain"}, + "gradle --console plain clean build", + id="patching_a_single_value_option", + ), + pytest.param( + "gradle clean build -Pboo=foo", + { + "--system-prop": { + "org.gradle.caching": "true", + }, + "--project-prop": { + "bar": "", + "boo": "another_foo", + }, + }, + "gradle clean build -Dorg.gradle.caching=true -Pbar -Pboo=another_foo", + id="patching_properties", + ), + pytest.param( + "gradle clean build -Pboo=foo", + { + "--project-prop": { + "boo": None, + } + }, + "gradle clean build", + id="removing_a_property_using_none", + ), + pytest.param( + "gradle clean build", + {"--exclude-task": ["boo", "test"]}, + "gradle clean build -x boo -x test", + id="excluding_tasks", + ), + pytest.param( + "gradle clean build --debug -x test -Dorg.gradle.caching=true -Pboo=foo --console=plain --no-build-cache", + { + "--exclude-task": None, + "--debug": None, + "--system-prop": None, + "--project-prop": None, + "--console": None, + "--build-cache": None, + }, + "gradle clean build", + id="removing_any_option_using_none", + ), + ], +) +def test_patch_gradle_cli_command( + gradle_cli_parser: GradleCLICommandParser, + original: str, + patch_options: dict[str, GradleOptionPatchValueType | None], + expected: str, +) -> None: + """Test the patch gradle cli command on valid input.""" + patch_cmds = _patch_commands( + cmds_sequence=[original.split()], + cli_parsers=[gradle_cli_parser], + patches={PatchCommandBuildTool.GRADLE: patch_options}, + ) + assert patch_cmds + assert len(patch_cmds) == 1 + + patch_gradle_cli_command = gradle_cli_parser.parse(patch_cmds.pop().to_cmds()) + expected_gradle_cli_command = gradle_cli_parser.parse(expected.split()) + + assert patch_gradle_cli_command == expected_gradle_cli_command + + +@pytest.mark.parametrize( + ("invalid_patch"), + [ + pytest.param( + { + "--this-option-should-never-exist": True, + }, + id="unrecognised_option_name", + ), + pytest.param( + { + "--system-prop": True, + }, + id="incorrect_system_prop_option_type", + ), + pytest.param( + { + "--project-prop": True, + }, + id="incorrect_project_prop_option_type", + ), + pytest.param( + { + "--debug": "some_value", + }, + id="incorrect_debug_option_type", + ), + pytest.param( + { + "--init-script": False, + }, + id="incorrect_init_script_option_type", + ), + pytest.param( + { + "--exclude-task": False, + }, + id="incorrect_exclude_task_option_type", + ), + pytest.param( + { + "tasks": False, + }, + id="incorrect_tasks_type", + ), + pytest.param( + { + "--no-build-cache": True, + }, + id="cannot_use_negated_form_option_as_key_in_patch", + ), + ], +) +def test_patch_gradle_cli_command_error( + gradle_cli_parser: GradleCLICommandParser, + invalid_patch: dict[str, GradleOptionPatchValueType | None], +) -> None: + """Test patch mvn cli command patching with invalid patch.""" + cmd_list = "gradle clean build --no-build-cache --debug --console plain -Dorg.gradle.parallel=true".split() + assert ( + _patch_commands( + cmds_sequence=[cmd_list], + cli_parsers=[gradle_cli_parser], + patches={ + PatchCommandBuildTool.GRADLE: invalid_patch, + }, + ) + is None + ) + + +@pytest.mark.parametrize( + ("cmds_sequence", "patches", "expected"), + [ + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + [ + "mvn clean package --debug".split(), + "gradle clean build --debug".split(), + ], + id="apply_multiple_types_of_patches", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + }, + [ + "mvn clean package --debug".split(), + "gradle clean build".split(), + ], + id="apply_one_type_of_patch_to_multiple_commands", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + {}, + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + id="apply_no_patch_to_multiple_build_commands", + ), + pytest.param( + [ + "make setup".split(), + "mvn clean package".split(), + "gradle clean build".split(), + "make clean".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + [ + "make setup".split(), + "mvn clean package --debug".split(), + "gradle clean build --debug".split(), + "make clean".split(), + ], + id="command_that_we_cannot_parse_stay_the_same", + ), + ], +) +def test_patching_multiple_commands( + maven_cli_parser: MavenCLICommandParser, + gradle_cli_parser: GradleCLICommandParser, + cmds_sequence: list[list[str]], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], + expected: list[list[str]], +) -> None: + """Test patching multiple commands.""" + patch_cli_commands = _patch_commands( + cmds_sequence=cmds_sequence, + cli_parsers=[maven_cli_parser, gradle_cli_parser], + patches=patches, + ) + + assert patch_cli_commands + + expected_cli_commands: list[CLICommand] = [] + cli_parsers: list[CLICommandParser] = [maven_cli_parser, gradle_cli_parser] + for cmd in expected: + effective_cli_parser = None + for cli_parser in cli_parsers: + if cli_parser.is_build_tool(cmd[0]): + effective_cli_parser = cli_parser + break + + if effective_cli_parser: + expected_cli_commands.append(cli_parser.parse(cmd)) + else: + expected_cli_commands.append( + UnparsedCLICommand( + original_cmds=cmd, + ) + ) + + assert patch_cli_commands == expected_cli_commands + + +@pytest.mark.parametrize( + ("cmds_sequence", "patches"), + [ + pytest.param( + [ + "mvn --this-is-not-a-mvn-option".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + id="incorrect_mvn_command", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build --not-a-gradle-command".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--debug": True, + }, + PatchCommandBuildTool.GRADLE: { + "--debug": True, + }, + }, + id="incorrect_gradle_command", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + "--not-a-valid-option": True, + }, + }, + id="incorrrect_patch_option_long_name", + ), + pytest.param( + [ + "mvn clean package".split(), + "gradle clean build".split(), + ], + { + PatchCommandBuildTool.MAVEN: { + # --debug expects a boolean or a None value. + "--debug": 10, + }, + }, + id="incorrrect_patch_value", + ), + ], +) +def test_patching_multiple_commands_error( + maven_cli_parser: MavenCLICommandParser, + gradle_cli_parser: GradleCLICommandParser, + cmds_sequence: list[list[str]], + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> None: + """Test error cases for patching multiple commands.""" + assert ( + _patch_commands( + cmds_sequence=cmds_sequence, + cli_parsers=[maven_cli_parser, gradle_cli_parser], + patches=patches, + ) + is None + ) From 226ac7971a7866f4af8e634f95b9c3a7a17a7c44 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:16:05 +1000 Subject: [PATCH 05/14] feat: add jdk version normalizer Signed-off-by: Trong Nhan Mai --- .../jdk_version_normalizer.py | 81 +++++++++++++++++++ .../test_jdk_version_normalizer.py | 53 ++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 src/macaron/build_spec_generator/jdk_version_normalizer.py create mode 100644 tests/build_spec_generator/test_jdk_version_normalizer.py diff --git a/src/macaron/build_spec_generator/jdk_version_normalizer.py b/src/macaron/build_spec_generator/jdk_version_normalizer.py new file mode 100644 index 000000000..852aab9b2 --- /dev/null +++ b/src/macaron/build_spec_generator/jdk_version_normalizer.py @@ -0,0 +1,81 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the logic to nomarlize a JDK version string to a major version number.""" + +SUPPORTED_JAVA_VERSION = [ + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", +] + + +def normalize_jdk_version(jdk_version_str: str) -> str | None: + """Return the major JDK version number. + + We assume that the jdk version string is already valid (e.g not using a JDK + version that is not available in the real world. + + For 1.x versions, we returns the major version as ``x``. + + Parameters + ---------- + jdk_version_str: str + The jdk version string. + + Returns + ------- + str | None + The major jdk version number as string or None if there is an error. + + Examples + -------- + >>> normalize_jdk_version("19") + '19' + >>> normalize_jdk_version("19-ea") + '19' + >>> normalize_jdk_version("11.0.1") + '11' + >>> normalize_jdk_version("1.8") + '8' + >>> normalize_jdk_version("25.0.1") + """ + first, _, after = jdk_version_str.partition(".") + jdk_major_ver = None + if first == "1": + # Cases like 1.8.0_523 + # Or 1.8 + jdk_major_ver, _, _ = after.partition(".") + else: + # Cases like 11 or 11.0 or 11.0.1 + jdk_major_ver = first + + if jdk_major_ver in SUPPORTED_JAVA_VERSION: + return jdk_major_ver + + # Handle edge cases: + # pkg:maven/org.apache.druid.integration-tests/druid-it-cases@25.0.0 + # - "8 (Azul Systems Inc. 25.282-b08)" + # pkg:maven/io.helidon.reactive.media/helidon-reactive-media-jsonp@4.0.0-ALPHA1 + # - "19-ea" + for support in SUPPORTED_JAVA_VERSION: + # Wouldn't work for cases like 19000 but that's not a big problem + # as long as the result is a valid major version. + if jdk_major_ver.startswith(support): + return support + + return None diff --git a/tests/build_spec_generator/test_jdk_version_normalizer.py b/tests/build_spec_generator/test_jdk_version_normalizer.py new file mode 100644 index 000000000..2236505e8 --- /dev/null +++ b/tests/build_spec_generator/test_jdk_version_normalizer.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for the jdk version normalizer module.""" + +import pytest + +from macaron.build_spec_generator.jdk_version_normalizer import normalize_jdk_version + + +@pytest.mark.parametrize( + ("version_string", "expected"), + [ + pytest.param( + "1.8.0_523", + "8", + id="1.x_with_patch_version", + ), + pytest.param( + "1.8", + "8", + id="1.x_without_patch_version", + ), + pytest.param( + "11.0.1", + "11", + id="major_number_stands_first_with_patch_version", + ), + pytest.param( + "11.0", + "11", + id="major_number_stands_first_without_patch_version", + ), + pytest.param( + "11", + "11", + id="just_the_major_version", + ), + pytest.param( + "8 (Azul Systems Inc. 25.282-b08)", + "8", + id="major_follows_with_text", + ), + pytest.param( + "19-ea", + "19", + id="major_follows_with_text", + ), + ], +) +def test_jdk_version_normalizer(version_string: str, expected: str) -> None: + """Test the jdk_version_normalizer function.""" + assert normalize_jdk_version(version_string) == expected From 01f235de48ba94ef0614a7abfd9d61c5f66f9ead Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:16:58 +1000 Subject: [PATCH 06/14] feat: add reproducible central buildspec generation Signed-off-by: Trong Nhan Mai --- .../build_spec_generator.py | 96 +++++ .../reproducible_central/__init__.py | 2 + .../reproducible_central/rc_build_info.py | 132 +++++++ .../reproducible_central.py | 355 ++++++++++++++++++ .../compare_rc_build_spec.py | 165 ++++++++ .../test_reproducible_central.py | 64 ++++ 6 files changed, 814 insertions(+) create mode 100644 src/macaron/build_spec_generator/build_spec_generator.py create mode 100644 src/macaron/build_spec_generator/reproducible_central/__init__.py create mode 100644 src/macaron/build_spec_generator/reproducible_central/rc_build_info.py create mode 100644 src/macaron/build_spec_generator/reproducible_central/reproducible_central.py create mode 100644 tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py create mode 100644 tests/build_spec_generator/reproducible_central/test_reproducible_central.py diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py new file mode 100644 index 000000000..0868f19a4 --- /dev/null +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -0,0 +1,96 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the functions used for generating build specs from the Macaron database.""" + +import logging +from collections.abc import Mapping +from enum import Enum + +from packageurl import PackageURL +from sqlalchemy import create_engine +from sqlalchemy.orm import Session + +from macaron.build_spec_generator.build_command_patcher import PatchCommandBuildTool, PatchValueType +from macaron.build_spec_generator.reproducible_central.reproducible_central import gen_reproducible_central_build_spec + +logger: logging.Logger = logging.getLogger(__name__) + + +class BuildSpecFormat(str, Enum): + """The build spec format that we supports.""" + + REPRODUCIBLE_CENTRAL = "rc-buildspec" + + +CLI_COMMAND_PATCHES: dict[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], +] = { + PatchCommandBuildTool.MAVEN: { + "goals": ["clean", "package"], + "--batch-mode": False, + "--quiet": False, + "--no-transfer-progress": False, + # Example pkg:maven/io.liftwizard/liftwizard-servlet-logging-mdc@1.0.1 + # https://github.com/liftwizard/liftwizard/blob/ + # 4ea841ffc9335b22a28a7a19f9156e8ba5820027/.github/workflows/build-and-test.yml#L23 + "--threads": None, + # For cases such as + # pkg:maven/org.apache.isis.valuetypes/isis-valuetypes-prism-resources@2.0.0-M7 + "--version": False, + "--define": { + # pkg:maven/org.owasp/dependency-check-utils@7.3.2 + # To remove "-Dgpg.passphrase=$MACARON_UNKNOWN" + "gpg.passphrase": None, + "skipTests": "true", + "maven.test.skip": "true", + "maven.site.skip": "true", + "rat.skip": "true", + "maven.javadoc.skip": "true", + }, + }, + PatchCommandBuildTool.GRADLE: { + "tasks": ["clean", "assemble"], + "--console": "plain", + "--exclude-task": ["test"], + "--project-prop": { + "skip.signing": "", + "skipSigning": "", + "gnupg.skip": "", + }, + }, +} + + +def gen_build_spec_str( + purl: PackageURL, + database_path: str, + build_spec_format: BuildSpecFormat, +) -> str | None: + """Return the content of a build spec file from a given PURL. + + Parameters + ---------- + purl: PackageURL + The package URL to generate build spec for. + database_path: str + The path to the Macaron database. + build_spec_format: BuildSpecFormat + The format of the final build spec content. + + Returns + ------- + str | None + The build spec content as a string, or None if there is an error. + """ + db_engine = create_engine(f"sqlite+pysqlite:///{database_path}", echo=False) + + with Session(db_engine) as session, session.begin(): + match build_spec_format: + case BuildSpecFormat.REPRODUCIBLE_CENTRAL: + return gen_reproducible_central_build_spec( + purl=purl, + session=session, + patches=CLI_COMMAND_PATCHES, + ) diff --git a/src/macaron/build_spec_generator/reproducible_central/__init__.py b/src/macaron/build_spec_generator/reproducible_central/__init__.py new file mode 100644 index 000000000..8e17a3508 --- /dev/null +++ b/src/macaron/build_spec_generator/reproducible_central/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. diff --git a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py new file mode 100644 index 000000000..52287df0b --- /dev/null +++ b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py @@ -0,0 +1,132 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the representation of information needed for Reproducible Central Buildspec generation.""" + +import logging +from collections.abc import Sequence +from dataclasses import dataclass + +from packageurl import PackageURL +from sqlalchemy.orm import Session + +from macaron.build_spec_generator.macaron_db_extractor import ( + GenericBuildCommandInfo, + lookup_any_build_command, + lookup_build_tools_check, + lookup_latest_component_id, + lookup_repository, +) +from macaron.database.table_definitions import Repository +from macaron.errors import QueryMacaronDatabaseError +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts + +logger: logging.Logger = logging.getLogger(__name__) + + +@dataclass +class RcInternalBuildInfo: + """An internal representation of the information obtained from the database for a PURL. + + This is only used for generating the Reproducible Central build spec. + """ + + purl: PackageURL + repository: Repository + generic_build_command_facts: Sequence[GenericBuildCommandInfo] | None + latest_component_id: int + build_tool_facts: Sequence[BuildToolFacts] + + +def get_rc_internal_build_info( + purl: PackageURL, + session: Session, +) -> RcInternalBuildInfo | None: + """Return an ``RcInternalBuildInfo`` instance that captures the build related information for a PackageURL. + + Parameters + ---------- + purl: PackageURL + The PackageURL to extract information about. + session: Session + The SQLAlchemy Session for the Macaron database. + + Returns + ------- + RcInternalBuildInfo | None + An instance of ``RcInternalBuildInfo`` or None if there was an error. + """ + try: + latest_component_id = lookup_latest_component_id( + purl=purl, + session=session, + ) + except QueryMacaronDatabaseError as lookup_component_error: + logger.error( + "Unexpected result from querying latest component id for %s. Error: %s", + purl.to_string(), + lookup_component_error, + ) + return None + if not latest_component_id: + logger.error( + "Cannot find an analysis result for PackageURL %s in the database. " + + "Please check if an analysis for it exists in the database.", + purl.to_string(), + ) + return None + logger.debug("Latest component ID: %d", latest_component_id) + + try: + build_tool_facts = lookup_build_tools_check( + component_id=latest_component_id, + session=session, + ) + except QueryMacaronDatabaseError as lookup_build_tools_error: + logger.error( + "Unexpected result from querying build tools for %s. Error: %s", + purl.to_string(), + lookup_build_tools_error, + ) + return None + if not build_tool_facts: + logger.error( + "Cannot find any build tool for PackageURL %s in the database.", + purl.to_string(), + ) + return None + logger.debug("Build tools discovered from the %s table: %s", BuildToolFacts.__tablename__, build_tool_facts) + + try: + lookup_component_repository = lookup_repository(latest_component_id, session) + except QueryMacaronDatabaseError as lookup_repository_error: + logger.error( + "Unexpected result from querying repository information for %s. Error: %s", + purl.to_string(), + lookup_repository_error, + ) + return None + if not lookup_component_repository: + logger.error( + "Cannot find any repository information for %s in the database.", + purl.to_string(), + ) + return None + + try: + lookup_build_facts = lookup_any_build_command(latest_component_id, session) + except QueryMacaronDatabaseError as lookup_build_command_error: + logger.error( + "Unexpected result from querying all build command information for %s. Error: %s", + purl.to_string(), + lookup_build_command_error, + ) + return None + + return RcInternalBuildInfo( + purl=purl, + repository=lookup_component_repository, + latest_component_id=latest_component_id, + build_tool_facts=build_tool_facts, + generic_build_command_facts=lookup_build_facts, + ) diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py new file mode 100644 index 000000000..46995f5a0 --- /dev/null +++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py @@ -0,0 +1,355 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the logic to generate a build spec in Reproducible Central format.""" + +import logging +import shlex +from collections.abc import Mapping, Sequence +from enum import Enum +from importlib import metadata as importlib_metadata +from pprint import pformat + +import sqlalchemy.orm +from packageurl import PackageURL + +from macaron.build_spec_generator.build_command_patcher import PatchCommandBuildTool, PatchValueType, patch_commands +from macaron.build_spec_generator.jdk_finder import find_jdk_version_from_central_maven_repo +from macaron.build_spec_generator.jdk_version_normalizer import normalize_jdk_version +from macaron.build_spec_generator.reproducible_central.rc_build_info import ( + RcInternalBuildInfo, + get_rc_internal_build_info, +) +from macaron.slsa_analyzer.checks.build_tool_check import BuildToolFacts + +logger: logging.Logger = logging.getLogger(__name__) + +# We use a subset of available config options from +# https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/doc/BUILDSPEC.md +# An example: +# https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/content/com/google/guava/guava-32.0.0-android.buildspec +# About this template +# - Because the Reproducible-Central build spec is a bash script by itself, we can use +# Bash comment syntax. +# - We only work with git repository and its commit hash. Therefore `gitRepo` and `gitTag` are used only. +# Even though it's called gitTag, a commit hash would work. +# https://github.com/jvm-repo-rebuild/reproducible-central/blob/46de9b405cb30ff94effe0ba47c1ebecc5a1c17e/bin/includes/fetchSource.sh#L59C1-L59C72 +STRING_TEMPLATE = """# Copyright (c) 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# Generated by Macaron version {macaron_version} + +{extra_comment} + +groupId={group_id} +artifactId={artifact_id} +version={version} + +gitRepo={git_repo} + +gitTag={git_tag} + +tool={tool} +jdk={jdk} + +newline={newline} + +command="{command}" + +buildinfo={buildinfo} +""" + + +class _MacaronBuildToolName(str, Enum): + """Represent the name of a build tool that Macaron stores in the database. + + These doesn't cover all build tools that Macaron support, and ONLY include the ones that we + support generating Reproducible Central Buildspec for. + """ + + MAVEN = "maven" + GRADLE = "gradle" + + +class _ReproducibleCentralBuildToolName(str, Enum): + """Represent the name of the build tool used in the Reproducible Central's Buildspec. + + https://github.com/jvm-repo-rebuild/reproducible-central/blob/master/doc/BUILDSPEC.md + """ + + MAVEN = "mvn" + GRADLE = "gradle" + SBT = "sbt" + + +def remove_shell_quote(cmd: list[str]) -> list[str]: + """Remove shell quotes from a shell command. + + Parameters + ---------- + cmd: list[str] + The shell command as list of string. + + Returns + ------- + list[str] + The shell command with all quote removed. + + Examples + -------- + >>> cmd = "mvn -f fit/core-reference/pom.xml verify '-Dit.test=RESTITCase' '-Dmodernizer.skip=true' '-Drat.skip=true'" + >>> remove_shell_quote(cmd.split()) + ['mvn', '-f', 'fit/core-reference/pom.xml', 'verify', '-Dit.test=RESTITCase', '-Dmodernizer.skip=true', '-Drat.skip=true'] + """ + return shlex.split(" ".join(cmd)) + + +def _get_extra_comments(comments: list[str]) -> str: + """Generate the shell comments for adding additional information into the RC-build spec.""" + shell_comments = [f"# {comment}" for comment in comments] + return "\n".join(shell_comments) + + +def _get_build_command_sequence(cmds_sequence: list[list[str]]) -> str: + """Return a build command sequence as a string. + + The build commands in the sequence will be && together, because RC's build spec + is a shell script. + """ + removed_shell_quote = [" ".join(remove_shell_quote(cmds)) for cmds in cmds_sequence] + result = " && ".join(removed_shell_quote) + return result + + +def _get_default_build_command_sequence( + macaron_build_tool_name: _MacaronBuildToolName, + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> list[list[str]] | None: + """Return a default build command sequence for the corresponding build tool name discovered by Macaron.""" + default_build_command = None + if macaron_build_tool_name == _MacaronBuildToolName.MAVEN: + default_build_command = "mvn clean package" + + if macaron_build_tool_name == _MacaronBuildToolName.GRADLE: + default_build_command = "./gradlew clean assemble publishToMavenLocal" + + if not default_build_command: + logger.critical( + "The default build command %s is not supported for getting default build command.", + macaron_build_tool_name, + ) + return None + + patched_build_commands = patch_commands( + cmds_sequence=[default_build_command.split()], + patches=patches, + ) + + if not patched_build_commands: + logger.error( + "Failed to patch default build command %s.", + default_build_command, + ) + return None + + return patched_build_commands + + +def _get_macaron_build_tool_name(build_tool_facts: Sequence[BuildToolFacts]) -> _MacaronBuildToolName | None: + """Return the build tool name reported by Macaron from the database.""" + for fact in build_tool_facts: + if fact.language in {"java"}: + try: + macaron_build_tool_name = _MacaronBuildToolName(fact.build_tool_name) + except ValueError: + continue + + # TODO: What happen if we report multiple build tool in the database. + return macaron_build_tool_name + + return None + + +def _gen_reproducible_central_build_spec( + build_info: RcInternalBuildInfo, + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> str | None: + """Return the RC's Buildspec content from a ``RcInternalBuildInfo`` instance. + + This function will perform necessary validation on the data captured within ``build_info`` to make sure + it has enough information for generating the output BuildSpec. + + This function will use the information available in ``build_info`` to populate the file. + For example, the GAV coordinate can be obtained from ``build_info.purl``. + + The ``patches`` mapping will be used for patching the build command in the `command` section of the Buildspec + output. + + The function will return the Buildspec file content as string or None if there is an error. + """ + extra_comments = [] + + purl = build_info.purl + logger.debug( + "Generating build spec for %s with command patches:\n%s", + purl, + pformat(patches), + ) + + group = purl.namespace + artifact = purl.name + version = purl.version + if group is None or version is None: + logger.error("Missing group and/or version for purl %s.", purl.to_string()) + return None + + extra_comments.append(f"Input PURL - {purl}") + + macaron_build_tool_name = _get_macaron_build_tool_name(build_info.build_tool_facts) + if not macaron_build_tool_name: + logger.error( + "The PackageURL %s doesn't have any build tool that we support for generating RC buildspec. It has %s.", + purl.to_string(), + [(fact.build_tool_name, fact.language) for fact in build_info.build_tool_facts], + ) + return None + + rc_build_tool_name = None + if macaron_build_tool_name == _MacaronBuildToolName.MAVEN: + rc_build_tool_name = _ReproducibleCentralBuildToolName.MAVEN + elif macaron_build_tool_name == _MacaronBuildToolName.GRADLE: + rc_build_tool_name = _ReproducibleCentralBuildToolName.GRADLE + if not rc_build_tool_name: + logger.critical("%s is not supported to generate RC's buildspec.", macaron_build_tool_name.value) + return None + + # Set the default build command and jdk version. + # The default build command depends on the build tool, while the default jdk version + # is 8. + final_build_command_seq = _get_default_build_command_sequence( + macaron_build_tool_name=macaron_build_tool_name, + patches=patches, + ) + if not final_build_command_seq: + logger.critical( + "Cannot generate a default build command for %s", + purl, + ) + return None + final_jdk_version = "8" + extra_comments.append( + f"Initial default JDK version {final_jdk_version} and default build command {final_build_command_seq}." + ) + + if build_info.generic_build_command_facts: + # The elements are ordered in decreasing confidence score. We pick the highest one. + build_fact = build_info.generic_build_command_facts[0] + lookup_build_command = build_fact.command + extra_comments.append(f"The lookup build command: {lookup_build_command}") + + patched_build_commands = patch_commands( + cmds_sequence=[lookup_build_command], + patches=patches, + ) + if not patched_build_commands: + logger.error( + "Failed to patch look up command %s.", + lookup_build_command, + ) + return None + + final_build_command_seq = patched_build_commands + + lookup_jdk_vers = build_fact.language_versions + if lookup_jdk_vers: + lookup_jdk_ver = lookup_jdk_vers.pop() + extra_comments.append(f"Jdk version from lookup build command {lookup_jdk_ver}.") + final_jdk_version = lookup_jdk_ver + else: + extra_comments.append("No JDK version found from lookup result.") + jdk_from_jar = find_jdk_version_from_central_maven_repo( + group_id=purl.name, + artifact_id=group, + version=version, + ) + if jdk_from_jar: + extra_comments.append(f"Found JDK version from jar {jdk_from_jar}.") + final_jdk_version = jdk_from_jar + else: + extra_comments.append(f"No JDK version found from jar {jdk_from_jar}.") + + major_jdk_version = normalize_jdk_version(final_jdk_version) + if not major_jdk_version: + logger.error("Failed to obtain the major version of %s", final_jdk_version) + return None + + template_format_values: dict[str, str] = { + "macaron_version": importlib_metadata.version("macaron"), + "group_id": group, + "artifact_id": artifact, + "version": version, + "git_repo": build_info.repository.remote_path, + "git_tag": build_info.repository.commit_sha, + "tool": rc_build_tool_name.value, + "newline": "lf", + "buildinfo": f"target/{artifact}-{version}.buildinfo", + "extra_comment": _get_extra_comments(extra_comments), + "jdk": final_jdk_version, + "command": _get_build_command_sequence(final_build_command_seq), + } + + return STRING_TEMPLATE.format_map(template_format_values) + + +def gen_reproducible_central_build_spec( + purl: PackageURL, + session: sqlalchemy.orm.Session, + patches: Mapping[ + PatchCommandBuildTool, + Mapping[str, PatchValueType | None], + ], +) -> str | None: + """Return the content of a Reproducible Central Buildspec File. + + The Reproducible Central Buildspec File Format can be found here: + https://github.com/jvm-repo-rebuild/reproducible-central/blob/e1708dd8dde3cdbe66b0cec9948812b601e90ba6/doc/BUILDSPEC.md#format + + Parameters + ---------- + purl: PackageURL + The PackageURL to generate build spec for. + session: sqlalchemy.orm.Session + The SQLAlchemy Session opened for the database to extract build information. + patches: Mapping[PatchCommandBuildTool, Mapping[str, PatchValueType | None]] + The patches to apply to the build commands in ``build_info`` before being populated in + the output Buildspec. + + Returns + ------- + str | None + The content of the Buildspec as string or None if there is an error. + The errors that can happen are: 1. The input PURL is invalid, 2. There is no supported build tool + for this PURL, 3. Failed to patch the build commands using the provided ``patches``, 4. The database from + ``session`` doesn't contain enough information. + """ + internal_build_info = get_rc_internal_build_info( + purl=purl, + session=session, + ) + + if not internal_build_info: + logger.error( + "Failed to obtain necessary data for purl %s from the database.", + purl, + ) + return None + + return _gen_reproducible_central_build_spec( + build_info=internal_build_info, + patches=patches, + ) diff --git a/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py b/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py new file mode 100644 index 000000000..df541d461 --- /dev/null +++ b/tests/build_spec_generator/reproducible_central/compare_rc_build_spec.py @@ -0,0 +1,165 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This script compares 2 Reproducible Central Buildspec files.""" + +import logging +import os +import sys +from collections.abc import Callable + +CompareFn = Callable[[object, object], bool] + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logging.basicConfig(format="[%(filename)s:%(lineno)s %(tag)s] %(message)s") + + +def log_with_tag(tag: str) -> Callable[[str], None]: + """Generate a log function that prints the name of the file and a tag at the beginning of each line.""" + + def log_fn(msg: str) -> None: + logger.info(msg, extra={"tag": tag}) + + return log_fn + + +log_info = log_with_tag("INFO") +log_err = log_with_tag("ERROR") +log_failed = log_with_tag("FAILED") +log_passed = log_with_tag("PASSED") + + +def log_diff_str(name: str, result: str, expected: str) -> None: + """Pretty-print the diff of two Python strings.""" + output = [ + f"'{name}'", + *("---- Result ---", f"{result}"), + *("---- Expected ---", f"{expected}"), + "-----------------", + ] + log_info("\n".join(output)) + + +def skip_compare(_result: object, _expected: object) -> bool: + """Return ``True`` always. + + This compare function is used when we want to skip comparing a field. + """ + return True + + +def compare_rc_build_spec( + result: dict[str, str], + expected: dict[str, str], + compare_fn_map: dict[str, CompareFn], +) -> bool: + """Compare two dictionaries obatained from 2 Reproducible Central build spec. + + Parameters + ---------- + result : dict[str, str] + The result object. + expected : dict[str, str] + The expected object. + compare_fn_map : str + A map from field name to corresponding compare function. + + Returns + ------- + bool + ``True`` if the comparison is successful, ``False`` otherwise. + """ + result_keys_only = result.keys() - expected.keys() + expected_keys_only = expected.keys() - result.keys() + + equal = True + + if len(result_keys_only) > 0: + log_err(f"Result has the following extraneous fields: {result_keys_only}") + equal = False + + if len(expected_keys_only) > 0: + log_err(f"Result does not contain these expected fields: {expected_keys_only}") + equal = False + + common_keys = set(result.keys()).intersection(set(expected.keys())) + + for key in common_keys: + if key in compare_fn_map: + equal &= compare_fn_map[key](result, expected) + continue + + if result[key] != expected[key]: + log_err(f"Mismatch found in '{key}'") + log_diff_str(key, result[key], expected[key]) + equal = False + + return equal + + +def extract_data_from_build_spec(build_spec_path: str) -> dict[str, str] | None: + """Extract data from build spec.""" + original_build_spec_content = None + try: + with open(build_spec_path, encoding="utf-8") as build_spec_file: + original_build_spec_content = build_spec_file.read() + except OSError as error: + log_err(f"Failed to read the Reproducible Central Buildspec file at {build_spec_path}. Error {error}.") + return None + + build_spec_values: dict[str, str] = {} + + # A Reproducible Central buildspec is a valid bash script. + # We use the following assumption to parse all key value mapping in a Reproducible Central buildspec. + # 1. Each variable-value mapping has the form of + # = + # For example ``tool=mvn`` + # 2. If the first letter of a line is "#" we treat that line as a comment and ignore + # it. + for line in original_build_spec_content.splitlines(): + if not line or line.startswith("#"): + continue + + variable, _, value = line.partition("=") + # We allow defining a variable multiple times, where subsequent definition + # override the previous one. + build_spec_values[variable] = value + + return build_spec_values + + +def main() -> int: + """Compare a Reproducible Central Buildspec file with an expected output.""" + result_path = sys.argv[1] + expect_path = sys.argv[2] + + result_build_spec = extract_data_from_build_spec(result_path) + expect_build_spec = extract_data_from_build_spec(expect_path) + + if not expect_build_spec: + log_err(f"Failed to extract bash variables from expected Buildspec at {expect_path}.") + return os.EX_USAGE + + if not result_build_spec: + log_err(f"Failed to extract bash variables from result Buildspec at {result_build_spec}.") + return os.EX_USAGE + + equal = compare_rc_build_spec( + result=result_build_spec, + expected=expect_build_spec, + compare_fn_map={ + "buildinfo": skip_compare, + }, + ) + + if not equal: + log_failed("The result RC Buildspec does not match the RC Buildspec.") + return os.EX_DATAERR + + log_passed("The result RC Buildspec matches the RC Buildspec.") + return os.EX_OK + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py new file mode 100644 index 000000000..6197d60c7 --- /dev/null +++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py @@ -0,0 +1,64 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the tests for Reproducible Central build spec generation""" + +import pytest + +from macaron.build_spec_generator.reproducible_central.reproducible_central import ( + _get_build_command_sequence, + _get_extra_comments, +) + + +@pytest.mark.parametrize( + ("comments", "expected"), + [ + pytest.param( + [ + "Input PURL - pkg:maven/oracle/macaron@v0.16.0", + "Initial default JDK version 8 and default build command boo", + ], + "# Input PURL - pkg:maven/oracle/macaron@v0.16.0\n# Initial default JDK version 8 and default build command boo", + ), + pytest.param( + [ + "Input PURL - pkg:maven/oracle/macaron@v0.16.0", + ], + "# Input PURL - pkg:maven/oracle/macaron@v0.16.0", + ), + pytest.param( + [], + "", + ), + ], +) +def test_get_extra_comments(comments: list[str], expected: str) -> None: + """Test the _get_extra_comments function.""" + assert _get_extra_comments(comments) == expected + + +@pytest.mark.parametrize( + ("cmds_sequence", "expected"), + [ + pytest.param( + [ + "make clean".split(), + "mvn clean package".split(), + ], + "make clean && mvn clean package", + ), + pytest.param( + [ + "mvn clean package".split(), + ], + "mvn clean package", + ), + ], +) +def test_get_build_command_sequence( + cmds_sequence: list[list[str]], + expected: str, +) -> None: + """Test the _get_build_command_sequence function.""" + assert _get_build_command_sequence(cmds_sequence) == expected From d37e1abbb57720e2b7d7feef5e6421825a81e24e Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:17:53 +1000 Subject: [PATCH 07/14] feat: expose macaron gen-build-spec cli command Signed-off-by: Trong Nhan Mai --- src/macaron/__main__.py | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index 9b746806e..cd8bc5ce3 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -14,6 +14,10 @@ from packageurl import PackageURL import macaron +from macaron.build_spec_generator.build_spec_generator import ( + BuildSpecFormat, + gen_build_spec_str, +) from macaron.config.defaults import create_defaults, load_defaults from macaron.config.global_config import global_config from macaron.errors import ConfigurationError @@ -236,6 +240,63 @@ def verify_policy(verify_policy_args: argparse.Namespace) -> int: return os.EX_USAGE +def gen_build_spec(gen_build_spec_args: argparse.Namespace) -> int: + """Generate a build spec containing the build information discovered by Macaron. + + Returns + ------- + int + Returns os.EX_OK if successful or the corresponding error code on failure. + """ + if not os.path.isfile(gen_build_spec_args.database): + logger.critical("The database file does not exist.") + return os.EX_OSFILE + + output_format = gen_build_spec_args.output_format + + try: + build_spec_format = BuildSpecFormat(output_format) + except ValueError: + logger.error("The output format %s is not supported.", output_format) + return os.EX_USAGE + + try: + purl = PackageURL.from_string(gen_build_spec_args.package_url) + except ValueError as error: + logger.error("Cannot parse purl %s. Error %s", gen_build_spec_args.package_url, error) + return os.EX_USAGE + + build_spec_content = gen_build_spec_str( + purl=purl, + database_path=gen_build_spec_args.database, + build_spec_format=build_spec_format, + ) + + if not build_spec_content: + logger.error("Error while generate reproducible central build spec.") + return os.EX_DATAERR + + logger.debug("Build spec content: \n%s", build_spec_content) + build_spec_filepath = os.path.join(global_config.output_path, "macaron.buildspec") + try: + with open(build_spec_filepath, mode="w", encoding="utf-8") as file: + logger.info( + "Generating the %s format build spec to %s.", + build_spec_format.value, + os.path.relpath(build_spec_filepath, os.getcwd()), + ) + file.write(build_spec_content) + except OSError as error: + logger.error( + "Could not generate the Buildspec to %s. Error: %s", + os.path.relpath(build_spec_filepath, os.getcwd()), + error, + ) + return os.EX_DATAERR + + return os.EX_OK + + def find_source(find_args: argparse.Namespace) -> int: """Perform repo and commit finding for a passed PURL, or commit finding for a passed PURL and repo.""" if repo_finder.find_source(find_args.package_url, find_args.repo_path or None): @@ -284,6 +345,9 @@ def perform_action(action_args: argparse.Namespace) -> None: find_source(action_args) + case "gen-build-spec": + sys.exit(gen_build_spec(action_args)) + case _: logger.error("Macaron does not support command option %s.", action_args.action) sys.exit(os.EX_USAGE) @@ -523,6 +587,30 @@ def main(argv: list[str] | None = None) -> None: ), ) + # Generate a build spec containing rebuild information for a software component. + gen_build_spec_parser = sub_parser.add_parser(name="gen-build-spec") + + gen_build_spec_parser.add_argument( + "-purl", + "--package-url", + required=True, + type=str, + help=("The PURL string of the software component to generate build spec for."), + ) + + gen_build_spec_parser.add_argument( + "--database", + help="Path to the database.", + required=True, + ) + + gen_build_spec_parser.add_argument( + "--output-format", + type=str, + help=('The output format. Can be rc-buildspec (Reproducible-central build spec) (default "rc-buildspec")'), + default="rc-buildspec", + ) + args = main_parser.parse_args(argv) if not args.action: From 9d8a2a76241ca672e1f97168cd52778419a3a6e1 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:19:51 +1000 Subject: [PATCH 08/14] test: modify the integration test script to use the compare rc build spec script Signed-off-by: Trong Nhan Mai --- tests/integration/run.py | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/integration/run.py b/tests/integration/run.py index 2cb77025b..ff4cb474d 100644 --- a/tests/integration/run.py +++ b/tests/integration/run.py @@ -80,6 +80,7 @@ def configure_logging(verbose: bool) -> None: "deps_report": ["tests", "dependency_analyzer", "compare_dependencies.py"], "vsa": ["tests", "vsa", "compare_vsa.py"], "find_source": ["tests", "find_source", "compare_source_reports.py"], + "rc_build_spec": ["tests", "build_spec_generator", "reproducible_central", "compare_rc_build_spec.py"], } VALIDATE_SCHEMA_SCRIPTS: dict[str, Sequence[str]] = { @@ -465,6 +466,52 @@ def cmd(self, macaron_cmd: str) -> list[str]: return args +class GenBuildSpecStepOptions(TypedDict): + """The configuration options of an gen-build-spec step.""" + + main_args: Sequence[str] + command_args: Sequence[str] + database: str + + +class GenBuildSpecStep(Step[GenBuildSpecStepOptions]): + """A step running the ``macaron gen-build-spec`` command.""" + + @staticmethod + def options_schema(cwd: str) -> cfgv.Map: # pylint: disable=unused-argument + """Generate the schema of a gen-build-spec step.""" + return cfgv.Map( + "gen-build-spec options", + None, + *[ + cfgv.Optional( + key="main_args", + check_fn=cfgv.check_array(cfgv.check_string), + default=[], + ), + cfgv.Optional( + key="command_args", + check_fn=cfgv.check_array(cfgv.check_string), + default=[], + ), + cfgv.Optional( + key="database", + check_fn=cfgv.check_string, + default="./output/macaron.db", + ), + ], + ) + + def cmd(self, macaron_cmd: str) -> list[str]: + """Generate the command of the step.""" + args = [macaron_cmd] + args.extend(self.options["main_args"]) + args.append("gen-build-spec") + args.extend(["--database", self.options["database"]]) + args.extend(self.options["command_args"]) + return args + + class VerifyStepOptions(TypedDict): """The configuration options of a verify step.""" @@ -599,6 +646,7 @@ def gen_step_schema(cwd: str, check_expected_result_files: bool) -> cfgv.Map: "verify", "validate_schema", "find-source", + "gen-build-spec", ), ), ), @@ -638,6 +686,12 @@ def gen_step_schema(cwd: str, check_expected_result_files: bool) -> cfgv.Map: key="options", schema=VerifyStep.options_schema(cwd=cwd), ), + cfgv.ConditionalRecurse( + condition_key="kind", + condition_value="gen-build-spec", + key="options", + schema=GenBuildSpecStep.options_schema(cwd=cwd), + ), cfgv.ConditionalRecurse( condition_key="kind", condition_value="find-source", @@ -842,6 +896,7 @@ def parse_step_config(step_id: int, step_config: Mapping) -> Step: "compare": CompareStep, "validate_schema": ValidateSchemaStep, "find-source": FindSourceStep, + "gen-build-spec": GenBuildSpecStep, }[kind] return step_cls( # type: ignore # https://github.com/python/mypy/issues/3115 step_id=step_id, From da329fef3caa23c9362dc17d77d50dcea4833f19 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 9 Jul 2025 22:20:38 +1000 Subject: [PATCH 09/14] test: add integration tests for the gen-build-spec error Signed-off-by: Trong Nhan Mai --- .../expected_macaron.buildspec | 25 +++++++ .../test.yaml | 33 ++++++++ .../cases/gen_rc_build_spec_error/test.yaml | 75 +++++++++++++++++++ .../expected_macaron.buildspec | 25 +++++++ .../test.yaml | 18 ++++- 5 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec create mode 100644 tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml create mode 100644 tests/integration/cases/gen_rc_build_spec_error/test.yaml create mode 100644 tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec new file mode 100644 index 000000000..b77755498 --- /dev/null +++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec @@ -0,0 +1,25 @@ +# Copyright (c) 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# Generated by Macaron version 0.15.0 + +# Input PURL - pkg:github/behnazh-w/example-maven-app@1.0 +# Initial default JDK version 8 and default build command [['mvn', '-DskipTests=true', '-Dmaven.test.skip=true', '-Dmaven.site.skip=true', '-Drat.skip=true', '-Dmaven.javadoc.skip=true', 'clean', 'package']]. +# The lookup build command: ['./mvnw', 'clean', 'package'] +# Jdk version from lookup build command 17. + +groupId=behnazh-w +artifactId=example-maven-app +version=1.0 + +gitRepo=https://github.com/behnazh-w/example-maven-app + +gitTag=2deca75ed5dd365eaf1558a82347b1f11306135f + +tool=mvn +jdk=17 + +newline=lf + +command="./mvnw -DskipTests=true -Dmaven.test.skip=true -Dmaven.site.skip=true -Drat.skip=true -Dmaven.javadoc.skip=true clean package" + +buildinfo=target/example-maven-app-1.0.buildinfo diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml new file mode 100644 index 000000000..161beeb08 --- /dev/null +++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/test.yaml @@ -0,0 +1,33 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Test the build spec generation on a Maven project with JDK version obtained + from the github action worfklow. + +tags: +- macaron-python-package +- macaron-docker-image +- macaron-gen-build-spec + +steps: +- name: Run macaron analyze + kind: analyze + options: + command_args: + - -purl + - pkg:github/behnazh-w/example-maven-app@1.0 +- name: Run Reproducible-central build spec generation + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:github/behnazh-w/example-maven-app@1.0 + - --output-format + - rc-buildspec +- name: Compare Buildspec. + kind: compare + options: + kind: rc_build_spec + result: ./output/macaron.buildspec + expected: expected_macaron.buildspec diff --git a/tests/integration/cases/gen_rc_build_spec_error/test.yaml b/tests/integration/cases/gen_rc_build_spec_error/test.yaml new file mode 100644 index 000000000..a04148abb --- /dev/null +++ b/tests/integration/cases/gen_rc_build_spec_error/test.yaml @@ -0,0 +1,75 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Running macaron gen-build-spec with invalid arguments. + +tags: +- macaron-python-package +- macaron-docker-image +- macaron-gen-build-spec + +steps: +- name: Run macaron analyze on the remote repository. + kind: analyze + options: + command_args: + - --package-url + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar + - -rp + - https://github.com/behnazh-w/example-maven-app +- name: Using a format that we don't support. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar + - --output-format + - this-format-is-not-supported + expect_fail: true +- name: Generate the RC Buildspec for a PURL that we haven't analyzed. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut/micronaut-core@4.2.3 + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec for a PURL that doesn't have namespace information. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut@4.2.3 + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec for a PURL that doesn't have version information. + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut/micronaut-core + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec using a database that doesn't exist. + kind: gen-build-spec + options: + database: output/some_database.db + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar + - --output-format + - rc-buildspec + expect_fail: true +- name: Generate the RC Buildspec using an invalid PURL. + kind: gen-build-spec + options: + command_args: + - -purl + - invalid_purl + - --output-format + - rc-buildspec + expect_fail: true diff --git a/tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec b/tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec new file mode 100644 index 000000000..8caca83d6 --- /dev/null +++ b/tests/integration/cases/micronaut-projects_micronaut-core/expected_macaron.buildspec @@ -0,0 +1,25 @@ +# Copyright (c) 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# Generated by Macaron version 0.15.0 + +# Input PURL - pkg:maven/io.micronaut/micronaut-core@4.2.3 +# Initial default JDK version 8 and default build command [['./gradlew', '-x', 'test', '-Pskip.signing', '-PskipSigning', '-Pgnupg.skip', 'clean', 'assemble']]. +# The lookup build command: ['./gradlew', 'publishToSonatype', 'closeAndReleaseSonatypeStagingRepository'] +# Jdk version from lookup build command 17. + +groupId=io.micronaut +artifactId=micronaut-core +version=4.2.3 + +gitRepo=https://github.com/micronaut-projects/micronaut-core + +gitTag=36dcaf0539536dce5fc753677341609ff7f273ca + +tool=gradle +jdk=17 + +newline=lf + +command="./gradlew -x test -Pskip.signing -PskipSigning -Pgnupg.skip clean assemble" + +buildinfo=target/micronaut-core-4.2.3.buildinfo diff --git a/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml b/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml index 467e623bc..7855b1be2 100644 --- a/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml +++ b/tests/integration/cases/micronaut-projects_micronaut-core/test.yaml @@ -1,12 +1,14 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. description: | Analyzing the PURL when automatic dependency resolution is skipped. Run policy CLI with micronaut-core results to test deploy command information. + Also generate a build spec for this PURL and validate the build spec content. tags: - macaron-python-package +- macaron-gen-build-spec steps: - name: Run macaron analyze @@ -30,3 +32,17 @@ steps: kind: policy_report result: output/policy_report.json expected: policy_report.json +- name: Run Reproducible-central build spec generation + kind: gen-build-spec + options: + command_args: + - -purl + - pkg:maven/io.micronaut/micronaut-core@4.2.3 + - --output-format + - rc-buildspec +- name: Compare Buildspec. + kind: compare + options: + kind: rc_build_spec + result: ./output/macaron.buildspec + expected: expected_macaron.buildspec From f52c6d48433b877ef7cd263860aae03f3c4cf5d5 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 18 Jul 2025 16:48:55 +1000 Subject: [PATCH 10/14] fix: add jdk version 22 and 23 into the list of supported jdk major version --- src/macaron/build_spec_generator/jdk_version_normalizer.py | 3 +++ tests/build_spec_generator/test_jdk_version_normalizer.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/macaron/build_spec_generator/jdk_version_normalizer.py b/src/macaron/build_spec_generator/jdk_version_normalizer.py index 852aab9b2..7dc9f169c 100644 --- a/src/macaron/build_spec_generator/jdk_version_normalizer.py +++ b/src/macaron/build_spec_generator/jdk_version_normalizer.py @@ -21,6 +21,9 @@ "19", "20", "21", + "22", + "23", + "24", ] diff --git a/tests/build_spec_generator/test_jdk_version_normalizer.py b/tests/build_spec_generator/test_jdk_version_normalizer.py index 2236505e8..61f085c1d 100644 --- a/tests/build_spec_generator/test_jdk_version_normalizer.py +++ b/tests/build_spec_generator/test_jdk_version_normalizer.py @@ -46,6 +46,12 @@ "19", id="major_follows_with_text", ), + # https://github.com/jboss-logging/jboss-logging/blob/25ad85c9cecf5a2f79db9a4d077221ed087e4ef5/.github/workflows/ci.yml#L46 + pytest.param( + "22-ea", + "22", + id="pkg_maven_org.jboss.logging_jboss-logging_3.6.1.Final", + ), ], ) def test_jdk_version_normalizer(version_string: str, expected: str) -> None: From 5c546f2ba78938441637a0671bb6a311a9362354 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 18 Jul 2025 16:50:12 +1000 Subject: [PATCH 11/14] chore: add a small log message at the beginning of build spec generation --- src/macaron/__main__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index cd8bc5ce3..c79570ecd 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -266,6 +266,13 @@ def gen_build_spec(gen_build_spec_args: argparse.Namespace) -> int: logger.error("Cannot parse purl %s. Error %s", gen_build_spec_args.package_url, error) return os.EX_USAGE + logger.info( + "Generating %s buildspec for PURL %s from %s.", + output_format, + purl, + gen_build_spec_args.database, + ) + build_spec_content = gen_build_spec_str( purl=purl, database_path=gen_build_spec_args.database, From 76bdd3a8762773d7d1dc3b8136593334cb5c3380 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 21 Jul 2025 11:23:48 +1000 Subject: [PATCH 12/14] fix: fix the sql statement for obtaining build check facts where the check result was mistakenly joined on the checkfacts.id instead og checkfact.check_result_id --- src/macaron/build_spec_generator/macaron_db_extractor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/macaron/build_spec_generator/macaron_db_extractor.py b/src/macaron/build_spec_generator/macaron_db_extractor.py index 05a485b7d..540422b64 100644 --- a/src/macaron/build_spec_generator/macaron_db_extractor.py +++ b/src/macaron/build_spec_generator/macaron_db_extractor.py @@ -240,7 +240,7 @@ def get_sql_stmt_build_as_code_check(component_id: int) -> Select[tuple[BuildAsC ) .join( CheckFacts, - onclause=MappedCheckResult.id == CheckFacts.id, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, ) .join( build_as_code_facts_alias, @@ -287,7 +287,7 @@ def get_sql_stmt_build_service_check(component_id: int) -> Select[tuple[BuildSer ) .join( CheckFacts, - onclause=MappedCheckResult.id == CheckFacts.id, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, ) .join( build_service_facts_alias, @@ -330,11 +330,11 @@ def get_sql_stmt_build_script_check(component_id: int) -> Select[tuple[BuildScri .select_from(Component) .join( MappedCheckResult, - onclause=Component.id == MappedCheckResult.component_id, + onclause=MappedCheckResult.component_id == Component.id, ) .join( CheckFacts, - onclause=MappedCheckResult.id == CheckFacts.id, + onclause=MappedCheckResult.id == CheckFacts.check_result_id, ) .join( build_script_facts_alias, From 63d610f405693a18015b8d40c87d0b1cb4bcb6b7 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 21 Jul 2025 14:52:38 +1000 Subject: [PATCH 13/14] chore: move the looking up of repository before the build tool lookup because without repository no build tool is found This commit also add some useful debug messages for extracting values from the database for Reproducible Central buildspec generation. --- .../reproducible_central/rc_build_info.py | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py index 52287df0b..105f10e45 100644 --- a/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py +++ b/src/macaron/build_spec_generator/reproducible_central/rc_build_info.py @@ -4,6 +4,7 @@ """This module contains the representation of information needed for Reproducible Central Buildspec generation.""" import logging +import pprint from collections.abc import Sequence from dataclasses import dataclass @@ -38,6 +39,12 @@ class RcInternalBuildInfo: build_tool_facts: Sequence[BuildToolFacts] +def format_build_command_infos(build_command_infos: list[GenericBuildCommandInfo]) -> str: + """Return the prettified str format for a list of `GenericBuildCommandInfo` instances.""" + pretty_formatted_ouput = [pprint.pformat(build_command_info) for build_command_info in build_command_infos] + return "\n".join(pretty_formatted_ouput) + + def get_rc_internal_build_info( purl: PackageURL, session: Session, @@ -78,43 +85,53 @@ def get_rc_internal_build_info( logger.debug("Latest component ID: %d", latest_component_id) try: - build_tool_facts = lookup_build_tools_check( - component_id=latest_component_id, - session=session, - ) - except QueryMacaronDatabaseError as lookup_build_tools_error: + lookup_component_repository = lookup_repository(latest_component_id, session) + except QueryMacaronDatabaseError as lookup_repository_error: logger.error( - "Unexpected result from querying build tools for %s. Error: %s", + "Unexpected result from querying repository information for %s. Error: %s", purl.to_string(), - lookup_build_tools_error, + lookup_repository_error, ) return None - if not build_tool_facts: + if not lookup_component_repository: logger.error( - "Cannot find any build tool for PackageURL %s in the database.", + "Cannot find any repository information for %s in the database.", purl.to_string(), ) return None - logger.debug("Build tools discovered from the %s table: %s", BuildToolFacts.__tablename__, build_tool_facts) + logger.info( + "Repository information for purl %s: url %s, commit %s", + purl, + lookup_component_repository.remote_path, + lookup_component_repository.commit_sha, + ) try: - lookup_component_repository = lookup_repository(latest_component_id, session) - except QueryMacaronDatabaseError as lookup_repository_error: + build_tool_facts = lookup_build_tools_check( + component_id=latest_component_id, + session=session, + ) + except QueryMacaronDatabaseError as lookup_build_tools_error: logger.error( - "Unexpected result from querying repository information for %s. Error: %s", + "Unexpected result from querying build tools for %s. Error: %s", purl.to_string(), - lookup_repository_error, + lookup_build_tools_error, ) return None - if not lookup_component_repository: + if not build_tool_facts: logger.error( - "Cannot find any repository information for %s in the database.", + "Cannot find any build tool for PackageURL %s in the database.", purl.to_string(), ) return None + logger.info( + "Build tools discovered from the %s table: %s", + BuildToolFacts.__tablename__, + [fact.build_tool_name for fact in build_tool_facts], + ) try: - lookup_build_facts = lookup_any_build_command(latest_component_id, session) + lookup_build_command_infos = lookup_any_build_command(latest_component_id, session) except QueryMacaronDatabaseError as lookup_build_command_error: logger.error( "Unexpected result from querying all build command information for %s. Error: %s", @@ -122,11 +139,15 @@ def get_rc_internal_build_info( lookup_build_command_error, ) return None + logger.debug( + "Build command information discovered\n%s", + format_build_command_infos(lookup_build_command_infos), + ) return RcInternalBuildInfo( purl=purl, repository=lookup_component_repository, latest_component_id=latest_component_id, build_tool_facts=build_tool_facts, - generic_build_command_facts=lookup_build_facts, + generic_build_command_facts=lookup_build_command_infos, ) From 9b1d5228341948e3b2c41f9d5a104c5e101b2eac Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 21 Jul 2025 17:07:32 +1000 Subject: [PATCH 14/14] chore: support gen-build-spec for the Docker image --- scripts/release_scripts/run_macaron.sh | 29 ++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 54ca13fd1..290cbd39f 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # This script runs the Macaron Docker image. @@ -279,7 +279,7 @@ while [[ $# -gt 0 ]]; do entrypoint+=("macaron") ;; # Parsing commands for macaron entrypoint. - analyze|dump-defaults|verify-policy) + analyze|dump-defaults|verify-policy|gen-build-spec) command=$1 shift break @@ -355,6 +355,19 @@ elif [[ $command == "verify-policy" ]]; then esac shift done +elif [[ $command == "gen-build-spec" ]]; then + while [[ $# -gt 0 ]]; do + case $1 in + -d|--database) + gen_build_spec_arg_database="$2" + shift + ;; + *) + rest_command+=("$1") + ;; + esac + shift + done elif [[ $command == "dump-defaults" ]]; then while [[ $# -gt 0 ]]; do case $1 in @@ -531,6 +544,18 @@ if [[ -n "${arg_datalog_policy_file:-}" ]]; then mount_file "-f/--file" "$datalog_policy_file" "$datalog_policy_file_in_container" "ro,Z" fi +# MACARON entrypoint - gen-build-spec command argvs +# This is for macaron gen-build-spec command. +# Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/. +if [[ -n "${gen_build_spec_arg_database:-}" ]]; then + gen_build_spec_database_path="${gen_build_spec_arg_database}" + file_name="$(basename "${gen_build_spec_database_path}")" + gen_build_spec_database_path_in_container="${MACARON_WORKSPACE}/database/${file_name}" + + argv_command+=("--database" "$gen_build_spec_database_path_in_container") + mount_file "-d/--database" "$gen_build_spec_database_path" "$gen_build_spec_database_path_in_container" "rw,Z" +fi + # Determine that ~/.gradle/gradle.properties exists to be mounted into ${MACARON_WORKSPACE}/gradle.properties if [[ -f "$HOME/.gradle/gradle.properties" ]]; then mounts+=("-v" "$HOME/.gradle/gradle.properties":"${MACARON_WORKSPACE}/gradle.properties:ro,Z")