2
2
import requests
3
3
import os
4
4
import time
5
+ import random
5
6
6
7
from bs4 import BeautifulSoup as soup
7
8
8
9
def get_datas (job , city , job_link ):
9
10
job_datas = [job_link ]
10
11
try :
11
12
for retry in range (5 ):
12
- time .sleep (5 )
13
+ time .sleep (random . randint ( 1 , 3 ) )
13
14
page_req = requests .get (
14
15
url = job_link ,
15
16
headers = {'User-agent' : f'{ job } _{ city } bot' }
16
17
)
17
18
if page_req .status_code == "429" :
18
19
print (f"\033 [1;36m\n ⚠️ Too many requests - Retrying with other IP...\033 [0m" )
19
20
change_ip (random .randint (1 , 30 ))
20
- time .sleep (3 )
21
+ time .sleep (random . randint ( 1 , 3 ) )
21
22
continue
22
23
else :
23
24
page_req .raise_for_status ()
24
25
# Parse HTML
25
26
job_soup = soup (page_req .text , 'html.parser' )
26
27
contents = job_soup .findAll ('div' , {'class' : 'topcard__content-left' })[0 :]
27
28
if len (contents ) == 0 :
28
- time .sleep (3 )
29
+ time .sleep (random . randint ( 1 , 3 ) )
29
30
continue
30
31
else :
31
- print ( f" \033 [1;36m \n ⚠️ Couldn't retrieve all datas for the job link: { job_link } \033 [0m" )
32
+ # Couldn't retrieve all datas for the job
32
33
break
33
34
34
35
if len (contents ) != 0 :
@@ -48,7 +49,7 @@ def get_datas(job, city, job_link):
48
49
49
50
# Scraping Job Title
50
51
for title in content .findAll ('h1' , {'class' : 'topcard__title' })[0 :]:
51
- print (f'\033 [0;32m📌 { title .text } \033 [0m' , f'\033 [1;33m- { org } \033 [0m' )
52
+ print (f'\n \ 033 [0;32m📌 { title .text } \033 [0m' , f'\033 [1;33m- { org } \033 [0m' )
52
53
job_datas .append (title .text .replace (',' , '.' ))
53
54
54
55
for location in content .findAll ('span' , {'class' : 'topcard__flavor topcard__flavor--bullet' })[0 :]:
@@ -80,9 +81,9 @@ def get_datas(job, city, job_link):
80
81
for criteria in job_soup .findAll ('span' , {'class' : 'job-criteria__text job-criteria__text--criteria' })[:4 ]:
81
82
job_datas .append (criteria .text )
82
83
else :
83
- print (f"\033 [1;36m⚠️ Saving (only) the job link on the CSV file.\033 [0m" )
84
+ print (f"\n \ 033 [1;36m⚠️ Saving (only) the job link on the CSV file.\033 [0m" )
84
85
85
- print (f"\033 [0;34mExtracted Datas: { job_datas } \033 [0m" )
86
+ # print(f"\033[0;34mExtracted Datas: {job_datas} \033[0m")
86
87
87
88
if len (job_datas ) < 10 :
88
89
fill_number = 10 - len (job_datas )
0 commit comments