Skip to content

Web Crawler

Web Crawler #169

Workflow file for this run

name: Web Crawler
on:
push:
branches:
- main
schedule:
- cron: '0 * * * *'
workflow_dispatch:
jobs:
crawl_and_commit:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run web crawling script and check for changes
run: python crawl.py
continue-on-error: true
- name: Commit crawled files
if: success()
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add crawled_output
if ! git diff-index --quiet HEAD --; then
git commit -m "chore: Update crawled data"
git push
else
echo "No changes to commit."
fi