Skip to content

Fix PR cycle time calculation for draft PRs #636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
10 changes: 10 additions & 0 deletions backend/analytics_server/mhq/exapi/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ def get_pr_commits(self, pr: GithubPullRequest):
def get_pr_reviews(self, pr: GithubPullRequest) -> GithubPaginatedList:
return pr.get_reviews()

def get_pr_timeline(
self, org_login: str, repo_name: str, pr_number: int
) -> List[Dict]:
github_url = (
f"{self.base_url}/repos/{org_login}/{repo_name}/issues/{pr_number}/timeline"
)
response = requests.get(github_url, headers=self.headers)
assert response.status_code == HTTPStatus.OK
return response.json()

def get_contributors(
self, org_login: str, repo_name: str
) -> List[GitHubContributor]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import timedelta
from typing import List
from datetime import datetime, timedelta
from typing import List, Optional

from mhq.service.code.sync.models import PRPerformance
from mhq.store.models.code import (
Expand All @@ -18,11 +18,11 @@ def create_pr_metrics(
pr: PullRequest,
pr_events: List[PullRequestEvent],
pr_commits: List[PullRequestCommit],
pr_earliest_event: Optional[datetime],
) -> PullRequest:
if pr.state == PullRequestState.OPEN:
return pr

pr_performance = self.get_pr_performance(pr, pr_events)
pr_performance = self.get_pr_performance(pr, pr_events, pr_earliest_event)

pr.first_response_time = (
pr_performance.first_review_time
Expand Down Expand Up @@ -52,7 +52,11 @@ def create_pr_metrics(
return pr

@staticmethod
def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]):
def get_pr_performance(
pr: PullRequest,
pr_events: [PullRequestEvent],
pr_earliest_event: Optional[datetime],
):
pr_events.sort(key=lambda x: x.created_at)
first_review = pr_events[0] if pr_events else None
approved_reviews = list(
Expand Down Expand Up @@ -86,8 +90,14 @@ def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]):
).total_seconds()
# Prevent garbage state when PR is approved post merging
merge_time = -1 if merge_time < 0 else merge_time
first_open_time = (
pr_earliest_event
if pr_earliest_event and isinstance(pr_earliest_event, datetime)
else pr.created_at
)

cycle_time = pr.state_changed_at - first_open_time

cycle_time = pr.state_changed_at - pr.created_at
if isinstance(cycle_time, timedelta):
cycle_time = cycle_time.total_seconds()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from mhq.store.repos.core import CoreRepoService
from mhq.utils.log import LOG
from mhq.utils.time import time_now, ISO_8601_DATE_FORMAT
from types import SimpleNamespace

PR_PROCESSING_CHUNK_SIZE = 100

Expand Down Expand Up @@ -147,7 +148,7 @@ def get_repo_pull_requests_data(
continue

pr_model, event_models, pr_commit_models = self.process_pr(
str(org_repo.id), github_pr
str(org_repo.id), github_pr, org_repo.org_name, org_repo.name
)
pull_requests.append(pr_model)
pr_events += event_models
Expand All @@ -157,7 +158,7 @@ def get_repo_pull_requests_data(
return pull_requests, pr_commits, pr_events

def process_pr(
self, repo_id: str, pr: GithubPullRequest
self, repo_id: str, pr: GithubPullRequest, org_name: str, org_repo: str
) -> Tuple[PullRequest, List[PullRequestEvent], List[PullRequestCommit]]:
pr_model: Optional[PullRequest] = self.code_repo_service.get_repo_pr_by_number(
repo_id, pr.number
Expand All @@ -172,6 +173,10 @@ def process_pr(
pr_events_model_list: List[PullRequestEvent] = self._to_pr_events(
reviews, pr_model, pr_event_model_list
)
pr_timeline_events = self._api.get_pr_timeline(org_name, org_repo, pr.number)
pr_earliest_ready_for_review = self.get_first_ready_for_review_event(
pr_timeline_events
)
if pr.merged_at:
commits: List[Dict] = list(
map(
Expand All @@ -183,11 +188,43 @@ def process_pr(
)

pr_model = self.code_etl_analytics_service.create_pr_metrics(
pr_model, pr_events_model_list, pr_commits_model_list
pr_model,
pr_events_model_list,
pr_commits_model_list,
pr_earliest_ready_for_review,
)

return pr_model, pr_events_model_list, pr_commits_model_list

def get_first_ready_for_review_event(
self, timeline: List[Dict]
) -> Optional[datetime]:
"""
Find the earliest 'ready_for_review' event from a list of PR timeline events.

Args:
timeline: List of PR timeline events

Returns:
The earliest ready_for_review event's datetime or None if no such event exists
"""
ready_events = []
for event in timeline:
if event.get("event") == "ready_for_review" and "created_at" in event:
try:
created_at = datetime.fromisoformat(
event["created_at"].replace("Z", "+00:00")
)
ready_events.append(SimpleNamespace(created_at=created_at))
except (ValueError, KeyError):
continue

if not ready_events:
return None

earliest_event = min(ready_events, key=lambda e: e.created_at)
return earliest_event.created_at.astimezone(pytz.UTC)

def get_revert_prs_mapping(
self, prs: List[PullRequest]
) -> List[PullRequestRevertPRMapping]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,23 @@
get_pull_request_commit,
)

# Updated code with None as the third argument in get_pr_performance calls


def test_pr_performance_returns_first_review_tat_for_first_review():
pr_service = CodeETLAnalyticsService()
t1 = time_now()
t2 = t1 + timedelta(hours=1)
pr = get_pull_request(created_at=t1, updated_at=t1)
pr_event = get_pull_request_event(pull_request_id=pr.id, created_at=t2)
performance = pr_service.get_pr_performance(pr, [pr_event])
performance = pr_service.get_pr_performance(pr, [pr_event], None)
assert performance.first_review_time == 3600


def test_pr_performance_returns_minus1_first_review_tat_for_no_reviews():
pr_service = CodeETLAnalyticsService()
pr = get_pull_request()
performance = pr_service.get_pr_performance(pr, [])
performance = pr_service.get_pr_performance(pr, [], None)
assert performance.first_review_time == -1


Expand All @@ -35,7 +37,7 @@ def test_pr_performance_returns_minus1_first_approved_review_tat_for_no_approved
pr_event_1 = get_pull_request_event(
pull_request_id=pr.id, state="REJECTED", created_at=t2
)
performance = pr_service.get_pr_performance(pr, [pr_event_1])
performance = pr_service.get_pr_performance(pr, [pr_event_1], None)
assert performance.merge_time == -1


Expand All @@ -46,7 +48,7 @@ def test_pr_performance_returns_merge_time_minus1_for_merged_pr_without_review()
pr = get_pull_request(
state=PullRequestState.MERGED, state_changed_at=t2, created_at=t1, updated_at=t2
)
performance = pr_service.get_pr_performance(pr, [])
performance = pr_service.get_pr_performance(pr, [], None)
assert performance.merge_time == -1


Expand All @@ -61,7 +63,7 @@ def test_pr_performance_returns_blocking_reviews():
created_at=t1,
updated_at=t2,
)
performance = pr_service.get_pr_performance(pr, [])
performance = pr_service.get_pr_performance(pr, [], None)
assert performance.blocking_reviews == 0


Expand All @@ -88,7 +90,7 @@ def test_pr_performance_returns_rework_time():
pull_request_id=pr.id, state=PullRequestEventState.APPROVED.value, created_at=t4
)
performance = pr_service.get_pr_performance(
pr, [changes_requested_1, comment2, approval]
pr, [changes_requested_1, comment2, approval], None
)

assert performance.rework_time == (t4 - t2).total_seconds()
Expand All @@ -104,7 +106,7 @@ def test_pr_performance_returns_rework_time_0_for_approved_prs():
approval = get_pull_request_event(
pull_request_id=pr.id, state=PullRequestEventState.APPROVED.value, created_at=t2
)
performance = pr_service.get_pr_performance(pr, [approval])
performance = pr_service.get_pr_performance(pr, [approval], None)

assert performance.rework_time == 0

Expand All @@ -130,7 +132,7 @@ def test_pr_performance_returns_rework_time_as_per_first_approved_prs():
pull_request_id=pr.id, state=PullRequestEventState.APPROVED.value, created_at=t4
)
performance = pr_service.get_pr_performance(
pr, [changes_requested_1, approval, approval_2]
pr, [changes_requested_1, approval, approval_2], None
)

assert performance.rework_time == (t3 - t2).total_seconds()
Expand All @@ -150,7 +152,9 @@ def test_pr_performance_returns_rework_time_for_open_prs():
approval = get_pull_request_event(
pull_request_id=pr.id, state=PullRequestEventState.APPROVED.value, created_at=t3
)
performance = pr_service.get_pr_performance(pr, [changes_requested_1, approval])
performance = pr_service.get_pr_performance(
pr, [changes_requested_1, approval], None
)

assert performance.rework_time == (t3 - t2).total_seconds()

Expand All @@ -165,7 +169,7 @@ def test_pr_performance_returns_rework_time_minus1_for_non_approved_prs():
state=PullRequestEventState.CHANGES_REQUESTED.value,
created_at=t2,
)
performance = pr_service.get_pr_performance(pr, [changes_requested_1])
performance = pr_service.get_pr_performance(pr, [changes_requested_1], None)

assert performance.rework_time == -1

Expand All @@ -176,7 +180,7 @@ def test_pr_performance_returns_rework_time_minus1_for_merged_prs_without_review
pr = get_pull_request(
state=PullRequestState.MERGED, state_changed_at=t1, created_at=t1, updated_at=t1
)
performance = pr_service.get_pr_performance(pr, [])
performance = pr_service.get_pr_performance(pr, [], None)

assert performance.rework_time == -1

Expand All @@ -188,15 +192,15 @@ def test_pr_performance_returns_cycle_time_for_merged_pr():
pr = get_pull_request(
state=PullRequestState.MERGED, state_changed_at=t2, created_at=t1, updated_at=t2
)
performance = pr_service.get_pr_performance(pr, [])
performance = pr_service.get_pr_performance(pr, [], None)

assert performance.cycle_time == 86400


def test_pr_performance_returns_cycle_time_minus1_for_non_merged_pr():
pr_service = CodeETLAnalyticsService()
pr = get_pull_request()
performance = pr_service.get_pr_performance(pr, [])
performance = pr_service.get_pr_performance(pr, [], None)

assert performance.cycle_time == -1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,42 @@ def test__to_pr_commits_given_a_list_of_commits_returns_a_list_of_pr_commits():
assert compare_objects_as_dicts(commit, expected_commit) is True


def test__dt_from_github_dt_string_given_date_string_returns_correct_datetime():
date_string = "2024-04-18T10:53:15Z"
expected = datetime(2024, 4, 18, 10, 53, 15, tzinfo=pytz.UTC)
assert GithubETLHandler._dt_from_github_dt_string(date_string) == expected
def test_get_first_ready_for_review_event_returns_earliest_ready_event():
earlier_date = datetime(2024, 1, 1, 10, 0, 0, tzinfo=pytz.UTC)
later_date = datetime(2024, 1, 2, 10, 0, 0, tzinfo=pytz.UTC)

events = [
{"event": "other_event", "created_at": earlier_date.isoformat()},
{"event": "ready_for_review", "created_at": later_date.isoformat()},
{"event": "ready_for_review", "created_at": earlier_date.isoformat()},
{"event": "another_event", "created_at": later_date.isoformat()},
]

github_etl_handler = GithubETLHandler(ORG_ID, None, None, None, None)
result = github_etl_handler.get_first_ready_for_review_event(events)

# The method should parse the string date back to a datetime object
expected_date = datetime.fromisoformat(
earlier_date.isoformat().replace("Z", "+00:00")
)
assert result == expected_date


def test_get_first_ready_for_review_event_returns_none_when_no_ready_events():
date = datetime(2024, 1, 1, 10, 0, 0, tzinfo=pytz.UTC)
events = [
{"event": "other_event", "created_at": date.isoformat()},
{"event": "another_event", "created_at": date.isoformat()},
]

github_etl_handler = GithubETLHandler(ORG_ID, None, None, None, None)
result = github_etl_handler.get_first_ready_for_review_event(events)

assert result is None


def test_get_first_ready_for_review_event_handles_empty_list():
github_etl_handler = GithubETLHandler(ORG_ID, None, None, None, None)
result = github_etl_handler.get_first_ready_for_review_event([])

assert result is None