Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions augur/api/metrics/repo_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -1240,3 +1240,59 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non
results = pd.read_sql(summarySQL, conn, params={'repo_id': repo_id,
'begin_date': begin_date, 'end_date': end_date})
return results

@register_metric()
def clones(repo_group_id, repo_id=None, begin_date=None, end_date=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the Clones metric API you implemented for #3247 got included in this PR.

Feature implementations and bugfixes for existing infrastructure should be made into separate PRs. Please rebase and update the PR to only include the error handling changes.

"""
Returns the number of repository clones (total and unique) for a given repo or repo group.
:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:param begin_date: Start date for filtering clone data (optional)
:param end_date: End date for filtering clone data (optional)
:return: DataFrame of clone counts (total and unique) per day
"""
if not begin_date:
begin_date = '1970-1-1 00:00:00'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

if repo_id:
clones_sql = s.sql.text("""
SELECT
repo_id,
clone_data_timestamp AS date,
count_clones AS total_clones,
unique_clones
FROM augur_data.repo_clones_data
WHERE repo_id = :repo_id
AND clone_data_timestamp BETWEEN :begin_date AND :end_date
ORDER BY clone_data_timestamp
""")
with current_app.engine.connect() as conn:
results = pd.read_sql(clones_sql, conn, params={
'repo_id': repo_id,
'begin_date': begin_date,
'end_date': end_date
})
return results
else:
clones_sql = s.sql.text("""
SELECT
repo_id,
clone_data_timestamp AS date,
count_clones AS total_clones,
unique_clones
FROM augur_data.repo_clones_data
WHERE repo_id IN (
SELECT repo_id FROM augur_data.repo WHERE repo_group_id = :repo_group_id
)
AND clone_data_timestamp BETWEEN :begin_date AND :end_date
ORDER BY repo_id, clone_data_timestamp
""")
with current_app.engine.connect() as conn:
results = pd.read_sql(clones_sql, conn, params={
'repo_group_id': repo_group_id,
'begin_date': begin_date,
'end_date': end_date
})
return results
37 changes: 27 additions & 10 deletions augur/tasks/github/detect_move/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,32 @@ def extract_owner_and_repo_from_endpoint(key_auth, url, logger):

def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='core'):

owner, name = get_owner_repo(repo.repo_git)
url = f"https://api.github.com/repos/{owner}/{name}"
try:
owner, name = get_owner_repo(repo.repo_git)
url = f"https://api.github.com/repos/{owner}/{name}"
except Exception as e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where possible and appropriate, please try to catch a specific exception class instead of a bare Exception.

logger.error(f"Failed to parse repo URL {repo.repo_git}: {str(e)}")
raise Exception(f"Invalid repository URL format: {repo.repo_git}")

attempts = 0
while attempts < 10:
response_from_gh = hit_api(key_auth, url, logger)
try:
response_from_gh = hit_api(key_auth, url, logger)

if response_from_gh and response_from_gh.status_code != 404:
break

if response_from_gh and response_from_gh.status_code != 404:
break
attempts += 1
except Exception as e:
logger.warning(f"API call attempt {attempts + 1} failed for {url}: {str(e)}")
attempts += 1
if attempts >= 10:
raise Exception(f"Failed to get API response after {attempts} attempts: {str(e)}")

attempts += 1
# Validate response
if not response_from_gh:
logger.error(f"No response received from GitHub API for {url}")
raise Exception(f"No response from GitHub API for {url}")

#Update Url and retry if 301
#301 moved permanently
Expand All @@ -79,9 +94,10 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c

update_repo_with_dict(repo, repo_update_dict, logger)

logger.info(f"Repository {repo.repo_git} has moved to {repo_update_dict['repo_git']}. Updated repository URL and resetting collection.")
raise Exception("ERROR: Repo has moved! Resetting Collection!")

#Mark as ignore if 404
#Mark as ignore if 404 (repository deleted)
if response_from_gh.status_code == 404:
repo_update_dict = {
'repo_git': repo.repo_git,
Expand Down Expand Up @@ -113,14 +129,15 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
collectionRecord.ml_task_id = None
collectionRecord.ml_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')


session.commit()
raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")

logger.warning(f"Repository {repo.repo_git} returned 404 (deleted). Marked as IGNORE and collection stopped.")
return # Return gracefully instead of raising exception


if attempts >= 10:
logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}")
raise Exception(f"ERROR: Could not get api response for repo: {url}")
raise Exception(f"ERROR: Could not get api response for repo: {url} after {attempts} attempts")

#skip if not 404
logger.info(f"Repo found at url: {url}")
Expand Down
54 changes: 35 additions & 19 deletions augur/tasks/github/detect_move/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,25 @@ def detect_github_repo_move_core(repo_git : str) -> None:

logger.info(f"Starting repo_move operation with {repo_git}")

repo = get_repo_by_repo_git(repo_git)
try:
repo = get_repo_by_repo_git(repo_git)

if not repo:
logger.error(f"Repository not found in database: {repo_git}")
return

logger.info(f"Pinging repo: {repo_git}")
logger.info(f"Pinging repo: {repo_git}")

key_auth = GithubRandomKeyAuth(logger)
key_auth = GithubRandomKeyAuth(logger)

with get_session() as session:

#Ping each repo with the given repo_git to make sure
#that they are still in place.
ping_github_for_repo_move(session, key_auth, repo, logger)
with get_session() as session:
#Ping each repo with the given repo_git to make sure
#that they are still in place.
ping_github_for_repo_move(session, key_auth, repo, logger)

except Exception as e:
logger.error(f"Error during repo move detection for {repo_git}: {str(e)}")
raise


@celery.task(base=AugurSecondaryRepoCollectionTask)
Expand All @@ -34,14 +42,22 @@ def detect_github_repo_move_secondary(repo_git : str) -> None:

logger.info(f"Starting repo_move operation with {repo_git}")

repo = get_repo_by_repo_git(repo_git)

logger.info(f"Pinging repo: {repo_git}")

key_auth = GithubRandomKeyAuth(logger)

with get_session() as session:

#Ping each repo with the given repo_git to make sure
#that they are still in place.
ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='secondary')
try:
repo = get_repo_by_repo_git(repo_git)

if not repo:
logger.error(f"Repository not found in database: {repo_git}")
return

logger.info(f"Pinging repo: {repo_git}")

key_auth = GithubRandomKeyAuth(logger)

with get_session() as session:
#Ping each repo with the given repo_git to make sure
#that they are still in place.
ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='secondary')

except Exception as e:
logger.error(f"Error during repo move detection for {repo_git}: {str(e)}")
raise
Loading