From 0254a176e1d334b7f65a691fbbca823d532b5928 Mon Sep 17 00:00:00 2001 From: Jagruti Date: Sun, 6 Mar 2022 22:04:49 +0530 Subject: [PATCH 1/5] Script to read trending git repos --- Python/Git_Trending_Repositories/Readme.md | 0 .../git-trending-repository-scraper.py | 37 ++++ .../github_trending_today.csv | 176 ++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 Python/Git_Trending_Repositories/Readme.md create mode 100644 Python/Git_Trending_Repositories/git-trending-repository-scraper.py create mode 100644 Python/Git_Trending_Repositories/github_trending_today.csv diff --git a/Python/Git_Trending_Repositories/Readme.md b/Python/Git_Trending_Repositories/Readme.md new file mode 100644 index 000000000..e69de29bb diff --git a/Python/Git_Trending_Repositories/git-trending-repository-scraper.py b/Python/Git_Trending_Repositories/git-trending-repository-scraper.py new file mode 100644 index 000000000..b5986c898 --- /dev/null +++ b/Python/Git_Trending_Repositories/git-trending-repository-scraper.py @@ -0,0 +1,37 @@ +import requests +from bs4 import BeautifulSoup +import csv + +page = requests.get('https://github.com/trending') + +soup = BeautifulSoup(page.text, 'html.parser') + +# get the repo list +repo = soup.find(class_="position-relative container-lg p-responsive pt-6") + +repo_list = repo.find_all(class_='Box-row') + +print(len(repo_list)) + +# Open writer with name +file_name = "github_trending_today.csv" +# set newline to be '' so that that new rows are appended without skipping any +f = csv.writer(open(file_name, 'w', newline='')) + +f.writerow(['Developer', 'Repo Name', 'Programming Language', 'Total Stars', 'Number of Forks']) + +for repo in repo_list: + h1 = repo.find(class_='h3 lh-condensed') + developer_name = h1.find('span').text.strip().strip(" /") + repo_name = h1.find('span').next_sibling + stars = repo.find(class_='octicon octicon-star').next_sibling.text + programming_language = repo.find('span', itemprop='programmingLanguage') + + if programming_language: + programming_language=programming_language.text.strip() + else: + programming_language='' + + no_of_forks = repo.find(class_='octicon octicon-repo-forked').next_sibling.text + + f.writerow([developer_name, repo_name, programming_language, stars, no_of_forks]) \ No newline at end of file diff --git a/Python/Git_Trending_Repositories/github_trending_today.csv b/Python/Git_Trending_Repositories/github_trending_today.csv new file mode 100644 index 000000000..ef70c5cf0 --- /dev/null +++ b/Python/Git_Trending_Repositories/github_trending_today.csv @@ -0,0 +1,176 @@ +Developer,Repo Name,Programming Language,Total Stars,Number of Forks +Arriven," + db1000n +",Go," + 410 +"," + 79 +" +nkallen," + plasticity +",TypeScript," + 442 +"," + 40 +" +public-apis," + public-apis +",Python," + 183,535 +"," + 21,178 +" +codez0mb1e," + resistance +",," + 228 +"," + 14 +" +xerpi," + vita2hos +",C," + 460 +"," + 15 +" +EbookFoundation," + free-programming-books +",," + 224,333 +"," + 47,576 +" +samber," + lo +",Go," + 1,086 +"," + 24 +" +TheAlgorithms," + Python +",Python," + 130,923 +"," + 34,536 +" +sunface," + rust-by-practice +",Rust," + 692 +"," + 31 +" +avelino," + awesome-go +",Go," + 76,375 +"," + 9,565 +" +Avik-Jain," + 100-Days-Of-ML-Code +",," + 34,964 +"," + 8,817 +" +italiaremote," + awesome-italia-remote +",Go," + 637 +"," + 107 +" +tauri-apps," + tauri +",Rust," + 34,061 +"," + 835 +" +lapce," + lapce +",Rust," + 5,511 +"," + 125 +" +microsoft," + Web-Dev-For-Beginners +",JavaScript," + 42,188 +"," + 6,164 +" +alexmon1989," + russia_ddos +",Python," + 29 +"," + 13 +" +wcandillon," + remotion-fireship +",TypeScript," + 118 +"," + 9 +" +jwasham," + coding-interview-university +",," + 211,782 +"," + 57,419 +" +iamadamdev," + bypass-paywalls-chrome +",JavaScript," + 23,350 +"," + 1,682 +" +topjohnwu," + Magisk +",C++," + 24,203 +"," + 5,865 +" +TheAlgorithms," + Javascript +",JavaScript," + 18,157 +"," + 3,148 +" +TencentARC," + GFPGAN +",Python," + 17,592 +"," + 2,672 +" +CaiJimmy," + hugo-theme-stack +",HTML," + 1,592 +"," + 382 +" +qinguoyi," + TinyWebServer +",C++," + 5,573 +"," + 1,678 +" +discordjs," + discord.js +",JavaScript," + 17,707 +"," + 3,262 +" From a63a13c2b4c47a240f0faa4fe9d48a4a3d0604d9 Mon Sep 17 00:00:00 2001 From: Jagruti Date: Sun, 6 Mar 2022 22:06:13 +0530 Subject: [PATCH 2/5] Script to read trending git repos --- .../github_trending_today.csv | 176 ------------------ 1 file changed, 176 deletions(-) delete mode 100644 Python/Git_Trending_Repositories/github_trending_today.csv diff --git a/Python/Git_Trending_Repositories/github_trending_today.csv b/Python/Git_Trending_Repositories/github_trending_today.csv deleted file mode 100644 index ef70c5cf0..000000000 --- a/Python/Git_Trending_Repositories/github_trending_today.csv +++ /dev/null @@ -1,176 +0,0 @@ -Developer,Repo Name,Programming Language,Total Stars,Number of Forks -Arriven," - db1000n -",Go," - 410 -"," - 79 -" -nkallen," - plasticity -",TypeScript," - 442 -"," - 40 -" -public-apis," - public-apis -",Python," - 183,535 -"," - 21,178 -" -codez0mb1e," - resistance -",," - 228 -"," - 14 -" -xerpi," - vita2hos -",C," - 460 -"," - 15 -" -EbookFoundation," - free-programming-books -",," - 224,333 -"," - 47,576 -" -samber," - lo -",Go," - 1,086 -"," - 24 -" -TheAlgorithms," - Python -",Python," - 130,923 -"," - 34,536 -" -sunface," - rust-by-practice -",Rust," - 692 -"," - 31 -" -avelino," - awesome-go -",Go," - 76,375 -"," - 9,565 -" -Avik-Jain," - 100-Days-Of-ML-Code -",," - 34,964 -"," - 8,817 -" -italiaremote," - awesome-italia-remote -",Go," - 637 -"," - 107 -" -tauri-apps," - tauri -",Rust," - 34,061 -"," - 835 -" -lapce," - lapce -",Rust," - 5,511 -"," - 125 -" -microsoft," - Web-Dev-For-Beginners -",JavaScript," - 42,188 -"," - 6,164 -" -alexmon1989," - russia_ddos -",Python," - 29 -"," - 13 -" -wcandillon," - remotion-fireship -",TypeScript," - 118 -"," - 9 -" -jwasham," - coding-interview-university -",," - 211,782 -"," - 57,419 -" -iamadamdev," - bypass-paywalls-chrome -",JavaScript," - 23,350 -"," - 1,682 -" -topjohnwu," - Magisk -",C++," - 24,203 -"," - 5,865 -" -TheAlgorithms," - Javascript -",JavaScript," - 18,157 -"," - 3,148 -" -TencentARC," - GFPGAN -",Python," - 17,592 -"," - 2,672 -" -CaiJimmy," - hugo-theme-stack -",HTML," - 1,592 -"," - 382 -" -qinguoyi," - TinyWebServer -",C++," - 5,573 -"," - 1,678 -" -discordjs," - discord.js -",JavaScript," - 17,707 -"," - 3,262 -" From 14d6562c874e16701e86bff374f706a24cc72a2e Mon Sep 17 00:00:00 2001 From: Jagruti Date: Sun, 6 Mar 2022 22:18:13 +0530 Subject: [PATCH 3/5] Updated readme file and added requirement.txt --- Python/Git_Trending_Repositories/Readme.md | 16 ++++++++++++++++ .../Git_Trending_Repositories/requirements.txt | 8 ++++++++ 2 files changed, 24 insertions(+) create mode 100644 Python/Git_Trending_Repositories/requirements.txt diff --git a/Python/Git_Trending_Repositories/Readme.md b/Python/Git_Trending_Repositories/Readme.md index e69de29bb..f9a832d7e 100644 --- a/Python/Git_Trending_Repositories/Readme.md +++ b/Python/Git_Trending_Repositories/Readme.md @@ -0,0 +1,16 @@ +### Scrape Git Trending respositories + +This script is scrapes details about Git trending repositories displayed here https://github.com/trending. + +The data will be exported in a .csv file. + +### Setup + +1. Create a Virtual Environment. +2. Install the requirements by using `pip3 install -r requiremnts.txt` +3. Hurray.! You're ready to use the script. + +### Running a file + +`python git-trending-repository-scraper.py` + diff --git a/Python/Git_Trending_Repositories/requirements.txt b/Python/Git_Trending_Repositories/requirements.txt new file mode 100644 index 000000000..165c7e829 --- /dev/null +++ b/Python/Git_Trending_Repositories/requirements.txt @@ -0,0 +1,8 @@ +beautifulsoup4==4.10.0 +bs4==0.0.1 +certifi==2021.10.8 +charset-normalizer==2.0.12 +idna==3.3 +requests==2.27.1 +soupsieve==2.3.1 +urllib3==1.26.8 From b7cd29d9cde22c1831827eac3155ef423cda2e9a Mon Sep 17 00:00:00 2001 From: Jagruti Date: Mon, 7 Mar 2022 09:34:10 +0530 Subject: [PATCH 4/5] Updating readme file with cron job instructions --- Python/Git_Trending_Repositories/Readme.md | 17 +++++++++++++++++ .../git-trending-repository-scraper.py | 2 -- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Python/Git_Trending_Repositories/Readme.md b/Python/Git_Trending_Repositories/Readme.md index f9a832d7e..dec9a32d9 100644 --- a/Python/Git_Trending_Repositories/Readme.md +++ b/Python/Git_Trending_Repositories/Readme.md @@ -14,3 +14,20 @@ The data will be exported in a .csv file. `python git-trending-repository-scraper.py` +### Running this script as a cron job + +The syntax is: + +`mm hh * * * ` + +This will execute the cron job everyday at a particular hour. + +`0 12 * * * /usr/bin/python3 /home/user/Rotten-Scripts/Python/Git_Trending_Repositories/git-trending-repository-scraper.py` + +Adding the above command in the `crontab` will run the script at 12:00 am every day. + +`.csv` file will be generated in the directory where the script file is. + +The only downside of the cron job is, to install all the requirements globally. + + diff --git a/Python/Git_Trending_Repositories/git-trending-repository-scraper.py b/Python/Git_Trending_Repositories/git-trending-repository-scraper.py index b5986c898..df73c451e 100644 --- a/Python/Git_Trending_Repositories/git-trending-repository-scraper.py +++ b/Python/Git_Trending_Repositories/git-trending-repository-scraper.py @@ -11,8 +11,6 @@ repo_list = repo.find_all(class_='Box-row') -print(len(repo_list)) - # Open writer with name file_name = "github_trending_today.csv" # set newline to be '' so that that new rows are appended without skipping any From c5b7f8102c440e20586a058c986de0fa1d68644f Mon Sep 17 00:00:00 2001 From: Jagruti Date: Wed, 9 Mar 2022 14:56:49 +0530 Subject: [PATCH 5/5] updating read.me file --- Python/Git_Trending_Repositories/Readme.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Python/Git_Trending_Repositories/Readme.md b/Python/Git_Trending_Repositories/Readme.md index dec9a32d9..3aee619d5 100644 --- a/Python/Git_Trending_Repositories/Readme.md +++ b/Python/Git_Trending_Repositories/Readme.md @@ -31,3 +31,10 @@ Adding the above command in the `crontab` will run the script at 12:00 am every The only downside of the cron job is, to install all the requirements globally. +## Output + +![Output Pic](https://i.imgur.com/oJyFuwP.png) + +## Author(s) + +Hi I'm [Jagruti Tiwari](https://github.com/Jagrutiti/) author of this script. \ No newline at end of file