Skip to content

Web Crawler

Web Crawler #15

Workflow file for this run

name: Web Crawler
on:
push:
branches:
- main
schedule:
- cron: '0 * * * *'
workflow_dispatch:
jobs:
crawl_and_commit:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run web crawling script and check for changes
run: python crawl.py
- name: Commit crawled files
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add crawled_output
# Check if there are changes to commit before committing
if ! git diff-index --quiet HEAD --; then
git commit -m "chore: Add crawled data"
git push
else
echo "No changes to commit."
fi