Skip to content

Feature/clean up csvs #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from bs4 import BeautifulSoup
import requests
import csv
from datetime import datetime
import os

def scrape_carrier(carrier_id):
""""scrape the page of one carrier"""
Expand All @@ -22,7 +24,7 @@ def scrape_carrier(carrier_id):
vehicle_type_rows = [item.find_parent('tr') for item in vehicle_type_soups]
vehicle_type_table = [[item.text for item in row.find_all()] for row in vehicle_type_rows]

return (included_cargo, vehicle_type_table)
return (included_cargo, vehicle_type_table, carrier_id)

def parse_carrier_ids(fp):
"""parse carriers ids to put into scraper"""
Expand All @@ -34,20 +36,26 @@ def parse_carrier_ids(fp):
ids = [row[CARRIER_ID_COLUMN_INDEX] for row in reader]
return ids

def write_carrier_results(results):
def write_carrier_results(results, directory):
"""writes carrier information into joinable csvs"""
with open('data/carrier.csv', 'a') as carrier_file:
csv.writer(carrier_file).writerow(results[0])
cargos, vehicles, carrier_id = results
with open(os.path.join(directory, 'carrier.csv'), 'a') as carrier_file:
for cargo in cargos:
csv.writer(carrier_file).writerow([carrier_id, cargo])

with open('data/carrier_vehicle.csv', 'a') as carrier_vehicle_file:
csv.writer(carrier_vehicle_file).writerow(results[1])
with open(os.path.join(directory, 'carrier_vehicle.csv'), 'a') as carrier_vehicle_file:
for vehicle_type in vehicles:
csv.writer(carrier_vehicle_file).writerow([carrier_id] + vehicle_type)

def main(fp):
"""scrape all carriers"""
ids = parse_carrier_ids(fp)
runtime = datetime.now().isoformat()
directory = os.path.join('data', runtime)
os.mkdir(directory)
for carrier_id in ids:
results = scrape_carrier(carrier_id)
write_carrier_results(results)
write_carrier_results(results, directory)

if __name__ == "__main__":
main('FMCSA_CENSUS1_2020Aug/FMCSA_CENSUS1_2020Aug.txt')