Skip to content

Update find_urls.py

Update find_urls.py #7

Workflow file for this run

name: Web Crawler
on:
push:
branches:
- main
schedule:
- cron: '0 * * * *'
jobs:
crawl_and_commit:
runs-on: ubuntu-latest
permissions:
contents: write # Grant write permissions for committing files
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run web crawling script and check for changes
id: crawl_step
run: |
python crawl.py
continue-on-error: true # Allows the workflow to continue even if the script returns a non-zero exit code
- name: Commit crawled files if there are changes
if: success()
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add crawled_output/*
git diff --cached --exit-code || git commit -m "chore: Update crawled data"