Skip to content

Commit 7a8809d

Browse files
Initial version of a git_bdiff module (#100)
* Initial version of a git_bdiff module A simple module that wraps around git diff and generates a complete list of files changed on a branch since it diverged from the parent. This is intended to replace the functionality of fcm_bdiff. * Add the ability to specify a repository directory Allow the constructor to specify the path to a repository and update the run_git method to change to the directory before running each git command. * Include review changes from @ericaneininger * Include review changes from @r-sharp * Improve tests of error handling Add string comparisons to existing exception tests and add a couple of exception handling tests for the git_run method to improve testing coverage. * Improve hash pattern match Reviewer has recommended changing the pattern match to require a full 40 character hex string. * Minor change to remove some dead test code * Update branch pattern somewhat This improves the pattern used to match the branch to make it closer to the official git definition. The pattern does not need to include all the defined by git check-ref-format because the branch name must already be compliant with these in order to be created in the first place. --------- Co-authored-by: Erica Neininger <107684099+ericaneininger@users.noreply.github.com>
1 parent 30cc67a commit 7a8809d

File tree

2 files changed

+369
-0
lines changed

2 files changed

+369
-0
lines changed

bdiff/git_bdiff.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/env python3
2+
# *********************************COPYRIGHT************************************
3+
# (C) Crown copyright Met Office. All rights reserved.
4+
# For further details please refer to the file COPYRIGHT.txt
5+
# which you should have received as part of this distribution.
6+
# *********************************COPYRIGHT************************************
7+
"""
8+
Module to obtain a list of all altered files on a git branch from
9+
point where it diverged from the parent branch to the most recent
10+
commit.
11+
12+
Usage is as follows:
13+
14+
>>> bdiff = GitBDiff()
15+
16+
And then:
17+
18+
>>> for change in bdiff.files():
19+
... print(change)
20+
"""
21+
22+
import re
23+
import subprocess
24+
from pathlib import Path
25+
26+
27+
class GitBDiffError(Exception):
28+
"""Base bdiff error class."""
29+
30+
31+
class GitBDiffNotGit(GitBDiffError):
32+
"""Error if the target not part of a git repository."""
33+
34+
def __init__(self, cmd):
35+
super().__init__(
36+
"not a repository (cmd:" + " ".join([str(i) for i in cmd]) + ")"
37+
)
38+
39+
40+
class GitBDiff:
41+
"""Class which generates a branch diff."""
42+
43+
# Name of primary branch - default is main
44+
primary_branch = "main"
45+
46+
# Match hex commit IDs
47+
_hash_pattern = re.compile(r"^\s*([0-9a-f]{40})\s*$")
48+
49+
# Match branch names. This should catch all valid names but may
50+
# also some invalid names through. This should matter given that
51+
# it is being used to match git command output. For a complete
52+
# overview of the naming scheme, see man git check-ref-format
53+
_branch_pattern = re.compile(r"^\s*([^\s~\^\:\?\*\[]+[^.])\s*$")
54+
55+
def __init__(self, parent=None, repo=None):
56+
self.parent = parent or self.primary_branch
57+
58+
if repo is None:
59+
self._repo = None
60+
else:
61+
self._repo = Path(repo)
62+
if not self._repo.is_dir():
63+
raise GitBDiffError(f"{repo} is not a directory")
64+
65+
self.ancestor = self.get_branch_point()
66+
self.current = self.get_latest_commit()
67+
self.branch = self.get_branch_name()
68+
69+
def get_branch_point(self):
70+
"""Get the branch point from the parent repo.
71+
72+
Find the commit which marks the point of divergence from the
73+
parent repository. If there are no changes or this is the
74+
trunk, the branch point will be the same as the most recent
75+
commit.
76+
"""
77+
78+
result = None
79+
for line in self.run_git(["merge-base", self.parent, "HEAD"]):
80+
if m := self._hash_pattern.match(line):
81+
result = m.group(1)
82+
break
83+
else:
84+
raise GitBDiffError("branch point not found")
85+
return result
86+
87+
def get_latest_commit(self):
88+
"""Get the last commit ID on the branch."""
89+
90+
result = None
91+
for line in self.run_git(["show", "--pretty=%H", "--no-patch"]):
92+
if m := self._hash_pattern.match(line):
93+
result = m.group(1)
94+
break
95+
else:
96+
raise GitBDiffError("current revision not found")
97+
return result
98+
99+
def get_branch_name(self):
100+
"""Get the name of the current branch."""
101+
result = None
102+
for line in self.run_git(["branch", "--show-current"]):
103+
if m := self._branch_pattern.match(line):
104+
result = m.group(1)
105+
break
106+
else:
107+
raise GitBDiffError("unable to get branch name")
108+
return result
109+
110+
@property
111+
def is_branch(self):
112+
"""Whether this is a branch or main."""
113+
return self.branch != self.primary_branch
114+
115+
@property
116+
def has_diverged(self):
117+
"""Whether the branch has diverged from its parent."""
118+
return self.ancestor != self.current
119+
120+
def files(self):
121+
"""Iterate over files changed on the branch."""
122+
123+
for line in self.run_git(
124+
["diff", "--name-only", "--diff-filter=AMX", self.ancestor]
125+
):
126+
if line != "":
127+
yield line
128+
129+
def run_git(self, args):
130+
"""Run a git command and yield the output."""
131+
132+
if not isinstance(args, list):
133+
raise TypeError("args must be a list")
134+
cmd = ["git"] + args
135+
136+
# Run the the command in the repo directory, capture the
137+
# output, and check for errors. The build in error check is
138+
# not used to allow specific git errors to be treated more
139+
# precisely
140+
proc = subprocess.run(
141+
cmd, capture_output=True, check=False, shell=False, cwd=self._repo
142+
)
143+
144+
for line in proc.stderr.decode("utf-8").split("\n"):
145+
if line.startswith("fatal: not a git repository"):
146+
raise GitBDiffNotGit(cmd)
147+
if line.startswith("fatal: "):
148+
raise GitBDiffError(line[7:])
149+
150+
if proc.returncode != 0:
151+
raise GitBDiffError(f"command returned {proc.returncode}")
152+
153+
yield from proc.stdout.decode("utf-8").split("\n")

bdiff/tests/test_git_bdiff.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#!/usr/bin/env python3
2+
# *********************************COPYRIGHT************************************
3+
# (C) Crown copyright Met Office. All rights reserved.
4+
# For further details please refer to the file COPYRIGHT.txt
5+
# which you should have received as part of this distribution.
6+
# *********************************COPYRIGHT************************************
7+
"""
8+
Test suite for git_bdiff module.
9+
"""
10+
11+
import os
12+
import subprocess
13+
import pytest
14+
15+
from git_bdiff import GitBDiff, GitBDiffError, GitBDiffNotGit
16+
17+
18+
# Disable warnings caused by the use of pytest fixtures
19+
# pylint: disable=redefined-outer-name
20+
21+
22+
def add_to_repo(start, end, message, mode="wt"):
23+
"""Add and commit dummy files to a repo."""
24+
25+
for i in range(start, end):
26+
with open(f"file{i}", mode, encoding="utf-8") as fd:
27+
print(f"Lorem ipsum dolor sit amet {i}", file=fd)
28+
29+
subprocess.run(["git", "add", "-A"], check=True)
30+
subprocess.run(["git", "commit", "--no-gpg-sign", "-m", message], check=True)
31+
32+
33+
@pytest.fixture(scope="session")
34+
def git_repo(tmpdir_factory):
35+
"""Create and populate a test git repo."""
36+
37+
location = tmpdir_factory.mktemp("data")
38+
os.chdir(location)
39+
40+
# Create the repo and add some files
41+
subprocess.run(["git", "init"], check=True)
42+
add_to_repo(0, 10, "Testing")
43+
44+
# Create a branch and add some files
45+
subprocess.run(["git", "checkout", "-b", "mybranch"], check=True)
46+
add_to_repo(20, 30, "Commit to mybranch")
47+
48+
# Create a branch-of-branch and add more files
49+
subprocess.run(["git", "checkout", "-b", "subbranch"], check=True)
50+
add_to_repo(40, 50, "Commit to subbranch")
51+
52+
# Create a branch from main without any changes
53+
subprocess.run(["git", "checkout", "main"], check=True)
54+
subprocess.run(["git", "checkout", "-b", "unchanged"], check=True)
55+
56+
# Create a branch from main and overwrite some things
57+
subprocess.run(["git", "checkout", "main"], check=True)
58+
subprocess.run(["git", "checkout", "-b", "overwrite"], check=True)
59+
add_to_repo(0, 10, "Overwriting", "at")
60+
61+
# Switch back to the main branch ready for testing
62+
subprocess.run(["git", "checkout", "main"], check=True)
63+
64+
return location
65+
66+
67+
def test_init(git_repo):
68+
"""Test creation of a new GitBDiff instance"""
69+
70+
os.chdir(git_repo)
71+
bdiff = GitBDiff()
72+
73+
assert bdiff.branch is not None
74+
assert bdiff.branch == "main"
75+
assert not bdiff.is_branch
76+
assert not bdiff.has_diverged
77+
78+
79+
def test_repo_selection(git_repo):
80+
"""Test selection of repository directory."""
81+
82+
os.chdir("/")
83+
bdiff = GitBDiff(repo=git_repo)
84+
85+
assert bdiff.branch is not None
86+
assert bdiff.branch == "main"
87+
assert not bdiff.is_branch
88+
assert not bdiff.has_diverged
89+
90+
91+
def test_invalid_repo_selection(git_repo):
92+
"""Test non-existent repo or plain file raises an error"""
93+
94+
with pytest.raises(GitBDiffError):
95+
GitBDiff(repo="/nosuch")
96+
97+
with pytest.raises(GitBDiffError):
98+
GitBDiff(repo="/etc/hosts")
99+
100+
101+
def test_branch_diff(git_repo):
102+
"""Test a simple branch diff."""
103+
104+
os.chdir(git_repo)
105+
subprocess.run(["git", "checkout", "mybranch"], check=True)
106+
107+
try:
108+
bdiff = GitBDiff()
109+
changes = list(bdiff.files())
110+
finally:
111+
subprocess.run(["git", "checkout", "main"], check=True)
112+
113+
assert bdiff.branch == "mybranch"
114+
assert bdiff.is_branch
115+
assert bdiff.has_diverged
116+
assert len(changes) == 10
117+
assert changes[0] == "file20"
118+
119+
120+
def test_branch_of_branch_diff(git_repo):
121+
"""Test a branch of branch diff.
122+
123+
This effectively tests whether all the commits since the branch
124+
point with main are picked up correctly.
125+
"""
126+
127+
os.chdir(git_repo)
128+
subprocess.run(["git", "checkout", "subbranch"], check=True)
129+
130+
try:
131+
bdiff = GitBDiff()
132+
changes = list(bdiff.files())
133+
finally:
134+
subprocess.run(["git", "checkout", "main"], check=True)
135+
136+
assert bdiff.branch == "subbranch"
137+
assert bdiff.is_branch
138+
assert bdiff.has_diverged
139+
assert len(changes) == 20
140+
assert changes[0] == "file20"
141+
assert changes[-1] == "file49"
142+
143+
144+
def test_overwritten_branch(git_repo):
145+
"""Test a diff of a branch with changed files."""
146+
147+
os.chdir(git_repo)
148+
subprocess.run(["git", "checkout", "overwrite"], check=True)
149+
try:
150+
bdiff = GitBDiff()
151+
changes = list(bdiff.files())
152+
finally:
153+
subprocess.run(["git", "checkout", "main"], check=True)
154+
155+
assert bdiff.branch == "overwrite"
156+
assert bdiff.is_branch
157+
assert bdiff.has_diverged
158+
assert len(changes) == 10
159+
160+
161+
def test_unchanged_branch(git_repo):
162+
"""Test a branch with no commits."""
163+
164+
os.chdir(git_repo)
165+
subprocess.run(["git", "checkout", "unchanged"], check=True)
166+
167+
try:
168+
bdiff = GitBDiff()
169+
changes = list(bdiff.files())
170+
finally:
171+
subprocess.run(["git", "checkout", "main"], check=True)
172+
173+
assert bdiff.branch == "unchanged"
174+
assert bdiff.is_branch
175+
assert not bdiff.has_diverged
176+
assert not changes
177+
178+
179+
def test_non_repo(tmpdir):
180+
"""Test exception if working directory is not a git repo."""
181+
182+
os.chdir(tmpdir)
183+
184+
with pytest.raises(GitBDiffNotGit) as exc:
185+
GitBDiff()
186+
assert "not a repository" in str(exc.value)
187+
188+
189+
def test_nonexistent_parent(git_repo):
190+
"""Test exception if parent branch does not exist.
191+
192+
This is a proxy test for the detection of all sorts of git
193+
errors.
194+
"""
195+
196+
os.chdir(git_repo)
197+
198+
with pytest.raises(GitBDiffError) as exc:
199+
GitBDiff(parent="nosuch")
200+
assert "Not a valid object name nosuch" in str(exc.value)
201+
202+
203+
def test_git_run(git_repo):
204+
"""Test git interface and error handling."""
205+
206+
bdiff = GitBDiff()
207+
208+
with pytest.raises(TypeError) as exc:
209+
# Use a string in place of a list
210+
list(i for i in bdiff.run_git("commit -m ''"))
211+
assert "args must be a list" in str(exc.value)
212+
213+
with pytest.raises(GitBDiffError) as exc:
214+
# Run a command that should return non-zero
215+
list(i for i in bdiff.run_git(["commit", "-m", "''"]))
216+
assert "command returned 1" in str(exc.value)

0 commit comments

Comments
 (0)