Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions augur/application/db/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
PullRequestReviewMessageRef,
CommitMessage,
RepoClone,
ContributorEngagement,
)

from augur.application.db.models.spdx import (
Expand Down
48 changes: 47 additions & 1 deletion augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3600,4 +3600,50 @@ class RepoClone(Base):
count_clones = Column(BigInteger)
clone_data_timestamp = Column(TIMESTAMP(precision=6))

repo = relationship("Repo")
repo = relationship("Repo")

class ContributorEngagement(Base):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@officialasishkumar : New database objects should be in a file in {repo root}/augur/application/schema/alembic/versions

I think with the PR open for the other GSOC team the next number in sequence is 35.

That enables alembic upgrades and downgrade.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sgoggins updated with commit 299bd90

__tablename__ = "contributor_engagement"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ABrain7710 / @Ulincsys : Can you confirm that its our practice not to modify the main script for table creation, but to have the versioning script also included so that new installs just get "all the upgrades"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way i've done it in the past for other projects is that both the main schema gets modified AND migrations get created. Then if someone creates a new database, they get the latest schema (this has required a small bit of code when augur detects a new DB and creates the tables to stamp it with the current alembic version). Then that database can be upgraded as time goes on, but new dbs are always starting out on the latest version

__table_args__ = {"schema": "augur_data"}

engagement_id = Column(
BigInteger,
primary_key=True,
server_default=text(
"nextval('augur_data.contributor_engagement_engagement_id_seq'::regclass)"
),
)
repo_id = Column(ForeignKey("augur_data.repo.repo_id"), nullable=False)
cntrb_id = Column(ForeignKey("augur_data.contributors.cntrb_id"), nullable=False)
username = Column(String, nullable=False)
full_name = Column(String)
country = Column(String)
platform = Column(String)

# D0 Level - Basic Engagement
d0_forked = Column(Boolean, server_default=text("false"))
d0_starred_or_watched = Column(Boolean, server_default=text("false"))
d0_engagement_timestamp = Column(TIMESTAMP(precision=6))

# D1 Level - Issue/Review Engagement
d1_first_issue_created_at = Column(TIMESTAMP(precision=6))
d1_first_pr_opened_at = Column(TIMESTAMP(precision=6))
d1_first_pr_commented_at = Column(TIMESTAMP(precision=6))

# D2 Level - Significant Contributions
d2_has_merged_pr = Column(Boolean, server_default=text("false"))
d2_created_many_issues = Column(Boolean, server_default=text("false"))
d2_total_comments = Column(BigInteger, server_default=text("0"))
d2_has_pr_with_many_commits = Column(Boolean, server_default=text("false"))
d2_commented_on_multiple_prs = Column(Boolean, server_default=text("false"))

# Metadata
tool_source = Column(String)
tool_version = Column(String)
data_source = Column(String)
data_collection_date = Column(
TIMESTAMP(precision=6), server_default=text("CURRENT_TIMESTAMP")
)

repo = relationship("Repo")
contributor = relationship("Contributor")
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Add contributor_engagement table

Revision ID: 35
Revises: 33
Create Date: 2025-07-26 10:00:00.000000

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "35"
down_revision = "33"
branch_labels = None
depends_on = None


def upgrade():
op.create_table(
"contributor_engagement",
sa.Column("engagement_id", sa.BigInteger(), autoincrement=True, nullable=False),
sa.Column("repo_id", sa.BigInteger(), nullable=False),
sa.Column("cntrb_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("username", sa.String(), nullable=False),
sa.Column("full_name", sa.String(), nullable=True),
sa.Column("country", sa.String(), nullable=True),
sa.Column("platform", sa.String(), nullable=True),
# D0 Level - Basic Engagement
sa.Column(
"d0_forked", sa.Boolean(), server_default=sa.text("false"), nullable=True
),
sa.Column(
"d0_starred_or_watched",
sa.Boolean(),
server_default=sa.text("false"),
nullable=True,
),
sa.Column("d0_engagement_timestamp", sa.TIMESTAMP, nullable=True),
# D1 Level - Issue/Review Engagement
sa.Column("d1_first_issue_created_at", sa.TIMESTAMP, nullable=True),
sa.Column("d1_first_pr_opened_at", sa.TIMESTAMP, nullable=True),
sa.Column("d1_first_pr_commented_at", sa.TIMESTAMP, nullable=True),
# D2 Level - Significant Contributions
sa.Column(
"d2_has_merged_pr",
sa.Boolean(),
server_default=sa.text("false"),
nullable=True,
),
sa.Column(
"d2_created_many_issues",
sa.Boolean(),
server_default=sa.text("false"),
nullable=True,
),
sa.Column(
"d2_total_comments",
sa.BigInteger(),
server_default=sa.text("0"),
nullable=True,
),
sa.Column(
"d2_has_pr_with_many_commits",
sa.Boolean(),
server_default=sa.text("false"),
nullable=True,
),
sa.Column(
"d2_commented_on_multiple_prs",
sa.Boolean(),
server_default=sa.text("false"),
nullable=True,
),
# Metadata
sa.Column("tool_source", sa.String(), nullable=True),
sa.Column("tool_version", sa.String(), nullable=True),
sa.Column("data_source", sa.String(), nullable=True),
sa.Column(
"data_collection_date",
sa.TIMESTAMP,
server_default=sa.text("CURRENT_TIMESTAMP"),
nullable=True,
),
sa.ForeignKeyConstraint(
["repo_id"],
["augur_data.repo.repo_id"],
name="fk_contributor_engagement_repo",
),
sa.ForeignKeyConstraint(
["cntrb_id"],
["augur_data.contributors.cntrb_id"],
name="fk_contributor_engagement_contributors",
),
sa.PrimaryKeyConstraint("engagement_id"),
schema="augur_data",
)


def downgrade():
op.drop_table("contributor_engagement", schema="augur_data")
24 changes: 24 additions & 0 deletions augur/tasks/contributors_engagement/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Contributor Engagement Tasks

This module contains tasks for collecting and processing contributor engagement data
at different levels (D0, D1, D2) as defined in the CHAOSS metrics.

D0: Basic engagement (forks, stars/watches)
D1: Issue/review engagement (first issue, first PR, first comment)
D2: Significant contributions (merged PRs, many issues, multiple comments)
"""

from augur.tasks.contributors_engagement.worker import (
collect_contributor_engagement,
collect_d0_engagement,
collect_d1_engagement,
collect_d2_engagement
)

__all__ = [
'collect_contributor_engagement',
'collect_d0_engagement',
'collect_d1_engagement',
'collect_d2_engagement'
]
Loading
Loading