Skip to content

Clean Data

Clean Data #327

Workflow file for this run

name: Clean Data
# Schedule: Every day to line up with final team lists
on:
schedule:
- cron: "0 9 * * 1" # Every Monday at 7 PM AEST or 8 PM AEDST
- cron: "0 9 * * 2" # Every Tuesday at 7 PM AEST or 8 PM AEDST
- cron: "0 9 * * 3" # Every Wednesday at 7 PM AEST or 8 PM AEDST
- cron: "0 9 * * 4" # Every Thursday at 7 PM AEST or 8 PM AEDST
- cron: "0 8 * * 5" # Every Friday at 6 PM AEST or 7 PM AEDST
- cron: "0 7 * * 6" # Every Saturday at 5 PM AEST or 6 PM AEDST
workflow_dispatch:
jobs:
run_r_script:
runs-on: ubuntu-latest
steps:
# Step 1: Checkout repository
- name: Checkout repository
uses: actions/checkout@v3
# Step 2: Create R library cache directory
- name: Create R library cache directory
run: mkdir -p ~/R/x86_64-pc-linux-gnu-library/4.4 # Adjust version if necessary
# Step 3: Cache R packages
- name: Cache R packages
uses: actions/cache@v3
with:
path: ~/R/x86_64-pc-linux-gnu-library/4.4 # Path where R packages are installed
key: ${{ runner.os }}-R-${{ hashFiles('R scripts/R Studio/AFL Data Cleaning/Data collection and cleaning.R') }}
restore-keys: |
${{ runner.os }}-R-
# Step 4: Install dependencies
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev
# Step 5: Setup R
- name: Setup R
uses: r-lib/actions/setup-r@v2
# Step 6: Install R packages
- name: Install R packages
run: |
Rscript -e 'packages <- c("fitzRoy", "lubridate", "tidyr", "dplyr", "zoo", "httr", "jsonlite", "glue"); installed_packages <- rownames(installed.packages()); for (pkg in packages) { if (!pkg %in% installed_packages) { install.packages(pkg) } }'
# Step 7: Generate clean dataset
- name: Run R script
run: |
Rscript "R scripts/R Studio/AFL Data Cleaning/Data collection and cleaning.R" # Adjust the path if necessary
# Step 8: Check for changes
- name: Check for changes
id: check_changes
run: |
git add -A # Track all changes
if git diff --cached --quiet; then
echo "No changes detected."
echo "changes=false" >> $GITHUB_ENV
else
echo "Changes detected."
echo "changes=true" >> $GITHUB_ENV
fi # Closing the if statement
# Step 9: Commit changes
- name: Commit changes
if: env.changes == 'true'
run: |
git config --local user.email "actions@github.com"
git config --local user.name "GitHub Actions"
git add .
git commit -m "clean data"
# Step 10: Push changes
- name: Push changes
if: env.changes == 'true'
run: |
git push
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}