Skip to content

Commit 277ecfb

Browse files
authored
chore: update gitlab repo remote url (#354)
Signed-off-by: Trong Nhan Mai <trong.nhan.mai@oracle.com>
1 parent b0c6e59 commit 277ecfb

File tree

8 files changed

+294
-11
lines changed

8 files changed

+294
-11
lines changed

src/macaron/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,7 @@ class ConfigurationError(MacaronError):
3030

3131
class CloneError(MacaronError):
3232
"""Happens when cannot clone a git repository."""
33+
34+
35+
class RepoCheckOutError(MacaronError):
36+
"""Happens when there is an error when checking out the correct revision of a git repository."""

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
NoneDependencyAnalyzer,
2828
)
2929
from macaron.dependency_analyzer.cyclonedx import get_deps_from_sbom
30-
from macaron.errors import CloneError
30+
from macaron.errors import CloneError, RepoCheckOutError
3131
from macaron.output_reporter.reporter import FileReporter
3232
from macaron.output_reporter.results import Record, Report, SCMStatus
3333
from macaron.slsa_analyzer import git_url
@@ -477,7 +477,7 @@ def _prepare_repo(
477477
repo_path: str,
478478
branch_name: str = "",
479479
digest: str = "",
480-
) -> Git:
480+
) -> Git | None:
481481
"""Prepare the target repository for analysis.
482482
483483
If ``repo_path`` is a remote path, the target repo is cloned to ``{target_dir}/{unique_path}``.
@@ -501,9 +501,10 @@ def _prepare_repo(
501501
502502
Returns
503503
-------
504-
Git
504+
Git | None
505505
The pydriller.Git object of the repository or None if error.
506506
"""
507+
# TODO: separate the logic for handling remote and local repos instead of putting them into this method.
507508
# Cannot specify a commit hash without specifying the branch.
508509
if not branch_name and digest:
509510
logger.error(
@@ -560,8 +561,31 @@ def _prepare_repo(
560561
logger.error("Cannot reset the target repository.")
561562
return None
562563

563-
if not git_url.check_out_repo_target(git_obj, branch_name, digest, (not is_remote)):
564-
logger.error("Cannot checkout the specific branch or commit of the target repo.")
564+
# Checking out the specific branch or commit. This operation varies depends on the git service that the
565+
# repository uses.
566+
if not is_remote:
567+
# If the repo path provided by the user is a local path, we need to get the actual origin remote URL of
568+
# the repo to decide on the suitable git service.
569+
origin_remote_url = git_url.get_remote_origin_of_local_repo(git_obj)
570+
if git_url.is_remote_repo(origin_remote_url):
571+
# The local repo's origin remote url is a remote URL (e.g https://host.com/a/b): In this case, we obtain
572+
# the corresponding git service using ``self.get_git_service``.
573+
git_service = self.get_git_service(origin_remote_url)
574+
else:
575+
# The local repo's origin remote url is a local path (e.g /path/to/local/...). This happens when the
576+
# target repository is a clone from another local repo or is a clone from a git archive -
577+
# https://git-scm.com/docs/git-archive: In this case, we fall-back to the generic function
578+
# ``git_url.check_out_repo_target``.
579+
if not git_url.check_out_repo_target(git_obj, branch_name, digest, not is_remote):
580+
logger.error("Cannot checkout the specific branch or commit of the target repo.")
581+
return None
582+
583+
return git_obj
584+
585+
try:
586+
git_service.check_out_repo(git_obj, branch_name, digest, not is_remote)
587+
except RepoCheckOutError as error:
588+
logger.error(error)
565589
return None
566590

567591
return git_obj

src/macaron/slsa_analyzer/git_service/base_git_service.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
from abc import abstractmethod
77

8+
from pydriller.git import Git
9+
810
from macaron.config.defaults import defaults
9-
from macaron.errors import CloneError, ConfigurationError
11+
from macaron.errors import CloneError, ConfigurationError, RepoCheckOutError
1012
from macaron.slsa_analyzer import git_url
1113

1214

@@ -27,7 +29,6 @@ def __init__(self, name: str) -> None:
2729
@abstractmethod
2830
def load_defaults(self) -> None:
2931
"""Load the values for this git service from the ini configuration."""
30-
raise NotImplementedError
3132

3233
def load_domain(self, section_name: str) -> str | None:
3334
"""Load the domain of the git service from the ini configuration section ``section_name``.
@@ -110,7 +111,32 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
110111
CloneError
111112
If there is an error cloning the repo.
112113
"""
113-
raise NotImplementedError()
114+
115+
@abstractmethod
116+
def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
117+
"""Checkout the branch and commit specified by the user of a repository.
118+
119+
Parameters
120+
----------
121+
git_obj : Git
122+
The Git object for the repository to check out.
123+
branch : str
124+
The branch to check out.
125+
digest : str
126+
The sha of the commit to check out.
127+
offline_mode: bool
128+
If true, no fetching is performed.
129+
130+
Returns
131+
-------
132+
Git
133+
The same Git object from the input.
134+
135+
Raises
136+
------
137+
RepoError
138+
If there is an error while checking out the specific branch or commit.
139+
"""
114140

115141

116142
class NoneGitService(BaseGitService):
@@ -154,3 +180,19 @@ def clone_repo(self, _clone_dir: str, url: str) -> None:
154180
Always raise, since this method should not be used to clone any repository.
155181
"""
156182
raise CloneError(f"Internal error encountered when cloning the repo '{url}'.")
183+
184+
def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
185+
"""Checkout the branch and commit specified by the user of a repository.
186+
187+
In this particular case, since this class represents a ``None`` git service,
188+
we do nothing but raise a ``RepoError``.
189+
190+
Raises
191+
------
192+
RepoError
193+
Always raise, since this method should not be used to check out in any repository.
194+
"""
195+
raise RepoCheckOutError(
196+
f"Cannot check out branch {branch} and commit {digest} for repo {git_obj.project_name} "
197+
+ "from an empty git service"
198+
)

src/macaron/slsa_analyzer/git_service/bitbucket.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
import logging
77

8+
from pydriller.git import Git
9+
10+
from macaron.errors import RepoCheckOutError
811
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
912

1013
logger: logging.Logger = logging.getLogger(__name__)
@@ -26,3 +29,7 @@ def clone_repo(self, _clone_dir: str, _url: str) -> None:
2629
"""Clone a BitBucket repo."""
2730
# TODO: implement this once support for BitBucket is added.
2831
logger.info("Cloning BitBucket repositories is not supported yet. Please clone the repository manually.")
32+
33+
def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
34+
"""Checkout the branch and commit specified by the user of a repository."""
35+
raise RepoCheckOutError("Checking out a branch or commit on a Bitbucket repository is not supported yet.")

src/macaron/slsa_analyzer/git_service/github.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33

44
"""This module contains the spec for the GitHub service."""
55

6+
from pydriller.git import Git
7+
68
from macaron.config.global_config import global_config
7-
from macaron.errors import ConfigurationError
9+
from macaron.errors import ConfigurationError, RepoCheckOutError
810
from macaron.slsa_analyzer import git_url
911
from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client
1012
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
@@ -56,3 +58,34 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
5658
If there is an error cloning the repo.
5759
"""
5860
git_url.clone_remote_repo(clone_dir, url)
61+
62+
def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
63+
"""Checkout the branch and commit specified by the user of a repository.
64+
65+
Parameters
66+
----------
67+
git_obj : Git
68+
The Git object for the repository to check out.
69+
branch : str
70+
The branch to check out.
71+
digest : str
72+
The sha of the commit to check out.
73+
offline_mode: bool
74+
If true, no fetching is performed.
75+
76+
Returns
77+
-------
78+
Git
79+
The same Git object from the input.
80+
81+
Raises
82+
------
83+
RepoError
84+
If there is error while checkout the specific branch and digest.
85+
"""
86+
if not git_url.check_out_repo_target(git_obj, branch, digest, offline_mode):
87+
raise RepoCheckOutError(
88+
f"Failed to check out branch {branch} and commit {digest} for repo {git_obj.project_name}."
89+
)
90+
91+
return git_obj

src/macaron/slsa_analyzer/git_service/gitlab.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
from abc import abstractmethod
2424
from urllib.parse import ParseResult, urlunparse
2525

26-
from macaron.errors import CloneError, ConfigurationError
26+
from pydriller.git import Git
27+
28+
from macaron.errors import CloneError, ConfigurationError, RepoCheckOutError
2729
from macaron.slsa_analyzer import git_url
2830
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
2931

@@ -103,6 +105,10 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
103105
To clone a GitLab repository with access token, we embed the access token in the https URL.
104106
See GitLab documentation: https://docs.gitlab.com/ee/gitlab-basics/start-using-git.html#clone-using-a-token.
105107
108+
If we clone using the https URL with the token embedded, this URL will be stored as plain text in .git/config as
109+
the origin remote URL. Therefore, after a repository is cloned, this remote origin URL will be set
110+
with the value of the original ``url`` (which does not have the embedded token).
111+
106112
Parameters
107113
----------
108114
clone_dir: str
@@ -117,7 +123,78 @@ def clone_repo(self, clone_dir: str, url: str) -> None:
117123
If there is an error cloning the repository.
118124
"""
119125
clone_url = self.construct_clone_url(url)
120-
git_url.clone_remote_repo(clone_dir, clone_url)
126+
# In the ``git_url.clone_remote_repo`` function, CloneError exception is raised whenever the repository
127+
# has not been cloned or the clone attempts failed.
128+
# In both cases, the repository would not be available on the file system to contain the token-included URL.
129+
# Therefore, we don't need to catch and handle the CloneError exceptions here.
130+
repo = git_url.clone_remote_repo(clone_dir, clone_url)
131+
132+
# If ``git_url.clone_remote_repo`` returns an Repo instance, this means that the repository is freshly cloned
133+
# with the token embedded URL. We will set its value back to the original non-token URL.
134+
# If ``git_url.clone_remote_repo`` returns None, it means that the repository already exists so we don't need
135+
# to do anything.
136+
if repo:
137+
try:
138+
origin_remote = repo.remote("origin")
139+
except ValueError as error:
140+
raise CloneError("Cannot find the remote origin for this repository.") from error
141+
142+
origin_remote.set_url(url)
143+
144+
def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: bool) -> Git:
145+
"""Checkout the branch and commit specified by the user of a repository.
146+
147+
For GitLab, this method set the origin remote URL of the target repository to the token-embedded URL if
148+
a token is available before performing the checkout operation.
149+
150+
After the checkout operation finishes, the origin remote URL is set back again to ensure that no token-embedded
151+
URL remains.
152+
153+
Parameters
154+
----------
155+
git_obj : Git
156+
The Git object for the repository to check out.
157+
branch : str
158+
The branch to check out.
159+
digest : str
160+
The sha of the commit to check out.
161+
offline_mode: bool
162+
If true, no fetching is performed.
163+
164+
Returns
165+
-------
166+
Git
167+
The same Git object from the input.
168+
169+
Raises
170+
------
171+
RepoError
172+
If there is error while checkout the specific branch and digest.
173+
"""
174+
remote_origin_url = git_url.get_remote_origin_of_local_repo(git_obj)
175+
176+
try:
177+
origin_remote = git_obj.repo.remote("origin")
178+
except ValueError as error:
179+
raise RepoCheckOutError("Cannot find the remote origin for this repository.") from error
180+
181+
try:
182+
reconstructed_url = self.construct_clone_url(remote_origin_url)
183+
except CloneError as error:
184+
raise RepoCheckOutError("Cannot parse the remote origin URL of this repository.") from error
185+
186+
origin_remote.set_url(reconstructed_url, remote_origin_url)
187+
188+
check_out_status = git_url.check_out_repo_target(git_obj, branch, digest, offline_mode)
189+
190+
origin_remote.set_url(remote_origin_url, reconstructed_url)
191+
192+
if not check_out_status:
193+
raise RepoCheckOutError(
194+
f"Failed to check out branch {branch} and commit {digest} for repo {git_obj.project_name}."
195+
)
196+
197+
return git_obj
121198

122199

123200
class SelfHostedGitLab(GitLab):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This is a test file.

0 commit comments

Comments
 (0)