Skip to content

Fix PR cycle time calculation for draft PRs #636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
8 changes: 8 additions & 0 deletions backend/analytics_server/mhq/exapi/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ def get_pr_commits(self, pr: GithubPullRequest):

def get_pr_reviews(self, pr: GithubPullRequest) -> GithubPaginatedList:
return pr.get_reviews()

def get_pr_timeline(self, org_login:str,repo_name:str, pr_number:int):
github_url = f"{self.base_url}/repos/{org_login}/{repo_name}/issues/{pr_number}/timeline"
response = requests.get(
github_url, headers=self.headers
)
assert response.status_code == HTTPStatus.OK
return response.json()

def get_contributors(
self, org_login: str, repo_name: str
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import timedelta
from typing import List
from datetime import datetime,timedelta,timezone
from typing import List,Optional

from mhq.service.code.sync.models import PRPerformance
from mhq.store.models.code import (
Expand All @@ -10,6 +10,7 @@
PullRequestState,
)
from mhq.utils.time import Interval
from mhq.utils.log import LOG


class CodeETLAnalyticsService:
Expand All @@ -18,11 +19,12 @@ def create_pr_metrics(
pr: PullRequest,
pr_events: List[PullRequestEvent],
pr_commits: List[PullRequestCommit],
pr_earliest_event:Optional[datetime]
) -> PullRequest:
if pr.state == PullRequestState.OPEN:
return pr

pr_performance = self.get_pr_performance(pr, pr_events)
pr_performance = self.get_pr_performance(pr, pr_events,pr_earliest_event)

pr.first_response_time = (
pr_performance.first_review_time
Expand Down Expand Up @@ -52,7 +54,10 @@ def create_pr_metrics(
return pr

@staticmethod
def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]):
def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent],pr_earliest_event:Optional[datetime]):
LOG.info(
f"Calculating PR performance for {pr.id} with {len(pr_events)} events"
)
pr_events.sort(key=lambda x: x.created_at)
first_review = pr_events[0] if pr_events else None
approved_reviews = list(
Expand Down Expand Up @@ -86,8 +91,21 @@ def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]):
).total_seconds()
# Prevent garbage state when PR is approved post merging
merge_time = -1 if merge_time < 0 else merge_time
first_open_time = (
pr_earliest_event
if pr_earliest_event and isinstance(pr_earliest_event, datetime)
else pr.created_at
)

cycle_time = pr.state_changed_at - first_open_time

LOG.info(
f"PR {pr.id} performance: "
f"rework_time={rework_time}, "
f"merge_time={merge_time}, "
f"cycle_time={cycle_time}"
)

cycle_time = pr.state_changed_at - pr.created_at
if isinstance(cycle_time, timedelta):
cycle_time = cycle_time.total_seconds()

Expand Down Expand Up @@ -172,4 +190,4 @@ def get_rework_cycles(
if isinstance(next_event, PullRequestCommit):
rework_cycles += 1

return rework_cycles
return rework_cycles
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from mhq.store.repos.core import CoreRepoService
from mhq.utils.log import LOG
from mhq.utils.time import time_now, ISO_8601_DATE_FORMAT
from types import SimpleNamespace

PR_PROCESSING_CHUNK_SIZE = 100

Expand Down Expand Up @@ -147,7 +148,7 @@ def get_repo_pull_requests_data(
continue

pr_model, event_models, pr_commit_models = self.process_pr(
str(org_repo.id), github_pr
str(org_repo.id), github_pr, org_repo.org_name, org_repo.name
)
pull_requests.append(pr_model)
pr_events += event_models
Expand All @@ -157,7 +158,7 @@ def get_repo_pull_requests_data(
return pull_requests, pr_commits, pr_events

def process_pr(
self, repo_id: str, pr: GithubPullRequest
self, repo_id: str, pr: GithubPullRequest, org_name:str, org_repo:str
) -> Tuple[PullRequest, List[PullRequestEvent], List[PullRequestCommit]]:
pr_model: Optional[PullRequest] = self.code_repo_service.get_repo_pr_by_number(
repo_id, pr.number
Expand All @@ -172,6 +173,8 @@ def process_pr(
pr_events_model_list: List[PullRequestEvent] = self._to_pr_events(
reviews, pr_model, pr_event_model_list
)
pr_timeline_events=self._api.get_pr_timeline(org_name,org_repo,pr.number)
pr_earliest_ready_for_review= self.get_first_ready_for_review_event(pr_timeline_events)
if pr.merged_at:
commits: List[Dict] = list(
map(
Expand All @@ -183,11 +186,41 @@ def process_pr(
)

pr_model = self.code_etl_analytics_service.create_pr_metrics(
pr_model, pr_events_model_list, pr_commits_model_list
pr_model, pr_events_model_list, pr_commits_model_list, pr_earliest_ready_for_review
)

return pr_model, pr_events_model_list, pr_commits_model_list


def get_first_ready_for_review_event(self, timeline: List[Dict]) -> Optional[datetime]:
"""
Find the earliest 'ready_for_review' event from a list of PR timeline events.

Args:
timeline: List of PR timeline events

Returns:
The earliest ready_for_review event's datetime or None if no such event exists
"""
ready_events = []
for event in timeline:
if event.get("event") == "ready_for_review" and "created_at" in event:
try:
created_at = datetime.fromisoformat(
event["created_at"].replace("Z", "+00:00")
)
ready_events.append(SimpleNamespace(created_at=created_at))
except (ValueError, KeyError):
continue

if not ready_events:
return None

earliest_event = min(ready_events, key=lambda e: e.created_at)
LOG.info(
f"Found earliest ready_for_review event: {earliest_event.created_at}"
)
return earliest_event.created_at.astimezone(pytz.UTC)

def get_revert_prs_mapping(
self, prs: List[PullRequest]
) -> List[PullRequestRevertPRMapping]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,49 @@ def test__to_pr_commits_given_a_list_of_commits_returns_a_list_of_pr_commits():
for commit, expected_commit in zip(pr_commits, expected_pr_commits):
assert compare_objects_as_dicts(commit, expected_commit) is True

def test_get_first_ready_for_review_event_returns_earliest_ready_event():
class MockTimelineEvent:
def __init__(self, event_type, created_at):
self.event = event_type
self.created_at = created_at

earlier_date = datetime(2024, 1, 1, 10, 0, 0, tzinfo=pytz.UTC)
later_date = datetime(2024, 1, 2, 10, 0, 0, tzinfo=pytz.UTC)

events = [
MockTimelineEvent("other_event", earlier_date),
MockTimelineEvent("ready_for_review", later_date),
MockTimelineEvent("ready_for_review", earlier_date),
MockTimelineEvent("another_event", later_date),
]

github_etl_handler = GithubETLHandler(ORG_ID, None, None, None, None)
result = github_etl_handler.get_first_ready_for_review_event(events)

assert result == earlier_date

def test_get_first_ready_for_review_event_returns_none_when_no_ready_events():
class MockTimelineEvent:
def __init__(self, event_type, created_at):
self.event = event_type
self.created_at = created_at

date = datetime(2024, 1, 1, 10, 0, 0, tzinfo=pytz.UTC)
events = [
MockTimelineEvent("other_event", date),
MockTimelineEvent("another_event", date),
]

github_etl_handler = GithubETLHandler(ORG_ID, None, None, None, None)
result = github_etl_handler.get_first_ready_for_review_event(events)

assert result is None

def test_get_first_ready_for_review_event_handles_empty_list():
github_etl_handler = GithubETLHandler(ORG_ID, None, None, None, None)
result = github_etl_handler.get_first_ready_for_review_event([])

assert result is None

def test__dt_from_github_dt_string_given_date_string_returns_correct_datetime():
date_string = "2024-04-18T10:53:15Z"
Expand Down
Loading