Skip to content
Open
3 changes: 2 additions & 1 deletion augur/application/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def get_development_flag():
"pull_repos": 1,
"rebuild_caches": 1,
"run_analysis": 1,
"run_facade_contributors": 1
"run_facade_contributors": 1,
"facade_contributor_full_recollect": 1
},
"Server": {
"cache_expire": "3600",
Expand Down
15 changes: 2 additions & 13 deletions augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@
repo = repo = get_repo_by_repo_git(repo_git)
repo_id = repo.repo_id

start_date = facade_helper.get_setting('start_date')

logger.info(f"Generating sequence for repo {repo_id}")

repo = get_repo_by_repo_git(repo_git)
Expand All @@ -123,7 +121,7 @@
repo_loc = (f"{absolute_path}/.git")
# Grab the parents of HEAD

parent_commits = get_parent_commits_set(repo_loc, start_date)
parent_commits = get_parent_commits_set(repo_loc)

# Grab the existing commits from the database
existing_commits = get_existing_commits_set(repo_id)
Expand Down Expand Up @@ -237,8 +235,6 @@
repo = get_repo_by_repo_git(repo_git)
repo_id = repo.repo_id

start_date = facade_helper.get_setting('start_date')

logger.info(f"Generating sequence for repo {repo_id}")

repo = get_repo_by_repo_id(repo_id)
Expand All @@ -248,7 +244,7 @@
repo_loc = (f"{absolute_path}/.git")
# Grab the parents of HEAD

parent_commits = get_parent_commits_set(repo_loc, start_date)
parent_commits = get_parent_commits_set(repo_loc)

# Grab the existing commits from the database
existing_commits = get_existing_commits_set(repo_id)
Expand All @@ -259,7 +255,7 @@
facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")


if not len(missing_commits) or repo_id is None:

Check warning on line 258 in augur/tasks/git/facade_tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) Raw Output: augur/tasks/git/facade_tasks.py:258:7: C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)
#session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
return

Expand Down Expand Up @@ -438,11 +434,6 @@

analysis_sequence = []

#repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git)
#repos = fetchall_data_from_sql_text(repo_list)

start_date = facade_helper.get_setting('start_date')

#repo_ids = [repo['repo_id'] for repo in repos]

#repo_id = repo_ids.pop(0)
Expand Down Expand Up @@ -473,8 +464,6 @@
#repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git)
#repos = fetchall_data_from_sql_text(repo_list)

start_date = facade_helper.get_setting('start_date')

#repo_ids = [repo['repo_id'] for repo in repos]

#repo_id = repo_ids.pop(0)
Expand Down
12 changes: 12 additions & 0 deletions augur/tasks/git/util/facade_worker/facade_worker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def __init__(self,logger: Logger):
self.rebuild_caches = worker_options["rebuild_caches"]
self.multithreaded = worker_options["multithreaded"]
self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"]
self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"]

self.tool_source = "Facade"
self.data_source = "Git Log"
Expand Down Expand Up @@ -244,6 +245,17 @@ def insert_or_update_data(self, query, **bind_args)-> None:
return
def inc_repos_processed(self):
self.repos_processed += 1

# def get_last_collected_commit_date(self,repo_id):
# commit_date_query = s.sql.text("""
# SELECT cmt_committer_timestamp FROM commits
# WHERE repo_id=:repo_id
# ORDER BY data_collection_date DESC
# LIMIT 1;
# """).bindparams(repo_id=repo_id)
#
# result = execute_sql(commit_date_query).fetchone()
# return result[0]

"""
class FacadeConfig:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name):

return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}"

def get_parent_commits_set(absolute_repo_path, start_date):
def get_parent_commits_set(absolute_repo_path):

parents = subprocess.Popen(["git --git-dir %s log --ignore-missing "
"--pretty=format:'%%H' --since=%s" % (absolute_repo_path,start_date)],
"--pretty=format:'%%H'" % (absolute_repo_path)],
stdout=subprocess.PIPE, shell=True)

parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep))
Expand Down
16 changes: 13 additions & 3 deletions augur/tasks/github/facade_github/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from augur.application.db.models import Contributor
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Contributor imported from augur.application.db.models (unused-import)

from augur.tasks.github.facade_github.core import *
from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors
from augur.application.db.lib import get_session, execute_session_query
from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import *


Expand Down Expand Up @@ -134,7 +135,7 @@
insert_alias(logger, cntrb, emailFromCommitData)
except LookupError as e:
logger.error(
''.join(traceback.format_exception(None, e, e.__traceback__)))

Check warning on line 138 in augur/tasks/github/facade_github/tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'traceback' (undefined-variable) Raw Output: augur/tasks/github/facade_github/tasks.py:138:24: E0602: Undefined variable 'traceback' (undefined-variable)

Check warning on line 138 in augur/tasks/github/facade_github/tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E1101: Class 'traceback' has no 'format_exception' member (no-member) Raw Output: augur/tasks/github/facade_github/tasks.py:138:24: E1101: Class 'traceback' has no 'format_exception' member (no-member)
logger.error(
f"Contributor id not able to be found in database despite the user_id existing. Something very wrong is happening. Error: {e}")
return
Expand Down Expand Up @@ -198,6 +199,12 @@
logger = logging.getLogger(insert_facade_contributors.__name__)
repo = get_repo_by_repo_git(repo_git)
repo_id = repo.repo_id
facade_helper = FacadeHelper(logger)

with get_session() as session:
query = session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id)
collection_status = execute_session_query(query,'one')
last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None

# Get all of the commit data's emails and names from the commit table that do not appear
# in the contributors table or the contributors_aliases table.
Expand All @@ -214,6 +221,7 @@
commits
WHERE
commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email )
or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email)
AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ))
Expand All @@ -231,6 +239,7 @@
commits
WHERE
commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email )
AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )
GROUP BY
Expand All @@ -239,7 +248,7 @@
commits.cmt_author_raw_email
ORDER BY
hash
""").bindparams(repo_id=repo_id)
""").bindparams(repo_id=repo_id,since_date=last_collected_date)

#Execute statement with session.
result = execute_sql(new_contrib_sql)
Expand All @@ -257,7 +266,6 @@

logger.debug("DEBUG: Got through the new_contribs")

facade_helper = FacadeHelper(logger)
# sql query used to find corresponding cntrb_id's of emails found in the contributor's table
# i.e., if a contributor already exists, we use it!
resolve_email_to_cntrb_id_sql = s.sql.text("""
Expand All @@ -271,6 +279,7 @@
commits
WHERE
contributors.cntrb_canonical = commits.cmt_author_raw_email
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND commits.repo_id = :repo_id
UNION
SELECT DISTINCT
Expand All @@ -286,7 +295,8 @@
contributors_aliases.alias_email = commits.cmt_author_raw_email
AND contributors.cntrb_id = contributors_aliases.cntrb_id
AND commits.repo_id = :repo_id
""").bindparams(repo_id=repo_id)
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
""").bindparams(repo_id=repo_id,since_date=last_collected_date)


result = execute_sql(resolve_email_to_cntrb_id_sql)
Expand Down
Loading