-
Notifications
You must be signed in to change notification settings - Fork 907
feat: contributor metric #3213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
feat: contributor metric #3213
Changes from all commits
bc9a5e0
f936e01
bdaa65e
245ae6b
611a911
91731ac
91a2bf8
299bd90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3600,4 +3600,50 @@ class RepoClone(Base): | |
count_clones = Column(BigInteger) | ||
clone_data_timestamp = Column(TIMESTAMP(precision=6)) | ||
|
||
repo = relationship("Repo") | ||
repo = relationship("Repo") | ||
|
||
class ContributorEngagement(Base): | ||
__tablename__ = "contributor_engagement" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ABrain7710 / @Ulincsys : Can you confirm that its our practice not to modify the main script for table creation, but to have the versioning script also included so that new installs just get "all the upgrades"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way i've done it in the past for other projects is that both the main schema gets modified AND migrations get created. Then if someone creates a new database, they get the latest schema (this has required a small bit of code when augur detects a new DB and creates the tables to stamp it with the current alembic version). Then that database can be upgraded as time goes on, but new dbs are always starting out on the latest version |
||
__table_args__ = {"schema": "augur_data"} | ||
|
||
engagement_id = Column( | ||
BigInteger, | ||
primary_key=True, | ||
server_default=text( | ||
"nextval('augur_data.contributor_engagement_engagement_id_seq'::regclass)" | ||
), | ||
) | ||
repo_id = Column(ForeignKey("augur_data.repo.repo_id"), nullable=False) | ||
cntrb_id = Column(ForeignKey("augur_data.contributors.cntrb_id"), nullable=False) | ||
username = Column(String, nullable=False) | ||
full_name = Column(String) | ||
country = Column(String) | ||
platform = Column(String) | ||
|
||
# D0 Level - Basic Engagement | ||
d0_forked = Column(Boolean, server_default=text("false")) | ||
d0_starred_or_watched = Column(Boolean, server_default=text("false")) | ||
d0_engagement_timestamp = Column(TIMESTAMP(precision=6)) | ||
|
||
# D1 Level - Issue/Review Engagement | ||
d1_first_issue_created_at = Column(TIMESTAMP(precision=6)) | ||
d1_first_pr_opened_at = Column(TIMESTAMP(precision=6)) | ||
d1_first_pr_commented_at = Column(TIMESTAMP(precision=6)) | ||
|
||
# D2 Level - Significant Contributions | ||
d2_has_merged_pr = Column(Boolean, server_default=text("false")) | ||
d2_created_many_issues = Column(Boolean, server_default=text("false")) | ||
d2_total_comments = Column(BigInteger, server_default=text("0")) | ||
d2_has_pr_with_many_commits = Column(Boolean, server_default=text("false")) | ||
d2_commented_on_multiple_prs = Column(Boolean, server_default=text("false")) | ||
|
||
# Metadata | ||
tool_source = Column(String) | ||
tool_version = Column(String) | ||
data_source = Column(String) | ||
data_collection_date = Column( | ||
TIMESTAMP(precision=6), server_default=text("CURRENT_TIMESTAMP") | ||
) | ||
|
||
repo = relationship("Repo") | ||
contributor = relationship("Contributor") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
"""Add contributor_engagement table | ||
|
||
Revision ID: 35 | ||
Revises: 33 | ||
Create Date: 2025-07-26 10:00:00.000000 | ||
|
||
""" | ||
|
||
import sqlalchemy as sa | ||
from alembic import op | ||
from sqlalchemy.dialects import postgresql | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "35" | ||
down_revision = "33" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
op.create_table( | ||
"contributor_engagement", | ||
sa.Column("engagement_id", sa.BigInteger(), autoincrement=True, nullable=False), | ||
sa.Column("repo_id", sa.BigInteger(), nullable=False), | ||
sa.Column("cntrb_id", postgresql.UUID(as_uuid=True), nullable=False), | ||
sa.Column("username", sa.String(), nullable=False), | ||
sa.Column("full_name", sa.String(), nullable=True), | ||
sa.Column("country", sa.String(), nullable=True), | ||
sa.Column("platform", sa.String(), nullable=True), | ||
# D0 Level - Basic Engagement | ||
sa.Column( | ||
"d0_forked", sa.Boolean(), server_default=sa.text("false"), nullable=True | ||
), | ||
sa.Column( | ||
"d0_starred_or_watched", | ||
sa.Boolean(), | ||
server_default=sa.text("false"), | ||
nullable=True, | ||
), | ||
sa.Column("d0_engagement_timestamp", sa.TIMESTAMP, nullable=True), | ||
# D1 Level - Issue/Review Engagement | ||
sa.Column("d1_first_issue_created_at", sa.TIMESTAMP, nullable=True), | ||
sa.Column("d1_first_pr_opened_at", sa.TIMESTAMP, nullable=True), | ||
sa.Column("d1_first_pr_commented_at", sa.TIMESTAMP, nullable=True), | ||
# D2 Level - Significant Contributions | ||
sa.Column( | ||
"d2_has_merged_pr", | ||
sa.Boolean(), | ||
server_default=sa.text("false"), | ||
nullable=True, | ||
), | ||
sa.Column( | ||
"d2_created_many_issues", | ||
sa.Boolean(), | ||
server_default=sa.text("false"), | ||
nullable=True, | ||
), | ||
sa.Column( | ||
"d2_total_comments", | ||
sa.BigInteger(), | ||
server_default=sa.text("0"), | ||
nullable=True, | ||
), | ||
sa.Column( | ||
"d2_has_pr_with_many_commits", | ||
sa.Boolean(), | ||
server_default=sa.text("false"), | ||
nullable=True, | ||
), | ||
sa.Column( | ||
"d2_commented_on_multiple_prs", | ||
sa.Boolean(), | ||
server_default=sa.text("false"), | ||
nullable=True, | ||
), | ||
# Metadata | ||
sa.Column("tool_source", sa.String(), nullable=True), | ||
sa.Column("tool_version", sa.String(), nullable=True), | ||
sa.Column("data_source", sa.String(), nullable=True), | ||
sa.Column( | ||
"data_collection_date", | ||
sa.TIMESTAMP, | ||
server_default=sa.text("CURRENT_TIMESTAMP"), | ||
nullable=True, | ||
), | ||
sa.ForeignKeyConstraint( | ||
["repo_id"], | ||
["augur_data.repo.repo_id"], | ||
name="fk_contributor_engagement_repo", | ||
), | ||
sa.ForeignKeyConstraint( | ||
["cntrb_id"], | ||
["augur_data.contributors.cntrb_id"], | ||
name="fk_contributor_engagement_contributors", | ||
), | ||
sa.PrimaryKeyConstraint("engagement_id"), | ||
schema="augur_data", | ||
) | ||
|
||
|
||
def downgrade(): | ||
op.drop_table("contributor_engagement", schema="augur_data") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
""" | ||
Contributor Engagement Tasks | ||
|
||
This module contains tasks for collecting and processing contributor engagement data | ||
at different levels (D0, D1, D2) as defined in the CHAOSS metrics. | ||
|
||
D0: Basic engagement (forks, stars/watches) | ||
D1: Issue/review engagement (first issue, first PR, first comment) | ||
D2: Significant contributions (merged PRs, many issues, multiple comments) | ||
""" | ||
|
||
from augur.tasks.contributors_engagement.worker import ( | ||
collect_contributor_engagement, | ||
collect_d0_engagement, | ||
collect_d1_engagement, | ||
collect_d2_engagement | ||
) | ||
|
||
__all__ = [ | ||
'collect_contributor_engagement', | ||
'collect_d0_engagement', | ||
'collect_d1_engagement', | ||
'collect_d2_engagement' | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@officialasishkumar : New database objects should be in a file in
{repo root}/augur/application/schema/alembic/versions
I think with the PR open for the other GSOC team the next number in sequence is 35.
That enables alembic upgrades and downgrade.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@sgoggins updated with commit 299bd90