|
5 | 5 | import os
|
6 | 6 | import argparse
|
7 | 7 |
|
8 |
| -import pandas as pd |
9 |
| - |
10 | 8 | from dotenv import load_dotenv
|
11 | 9 | from data_collection.yt_data_api import YouTubeDataAPI
|
12 | 10 | from data_collection.sentiment import SentimentAnalysis
|
13 | 11 | from log import Logger
|
14 | 12 | from astro_db import AstroDB
|
15 | 13 |
|
| 14 | + |
16 | 15 | def extract_video_id_from_url(url: str) -> str:
|
17 |
| - """ |
18 |
| - Grab the video ID from the provided URL. The ID will come after |
19 |
| - the substring 'v=' in the URL, so I just split the string on that |
20 |
| - substring and return the latter half. |
21 |
| - """ |
22 |
| - video_id = url.split('v=')[1] |
23 |
| - return video_id |
| 16 | + """ |
| 17 | + Grab the video ID from the provided URL. The ID will come after |
| 18 | + the substring 'v=' in the URL, so I just split the string on that |
| 19 | + substring and return the latter half. |
| 20 | + """ |
| 21 | + |
| 22 | + video_id = url.split('v=')[1] |
| 23 | + return video_id |
| 24 | + |
24 | 25 |
|
25 | 26 | def parse_args():
|
26 |
| - """ |
27 |
| - Argument parsing logic. Returns the arguments parsed from the CLI |
28 |
| - """ |
29 |
| - parser = argparse.ArgumentParser() |
| 27 | + """ |
| 28 | + Argument parsing logic. Returns the arguments parsed from the CLI |
| 29 | + """ |
30 | 30 |
|
31 |
| - parser.add_argument("youtube_url", type=str, help="URL to youtube video") |
32 |
| - parser.add_argument("-l", "--log", type=str, choices=['debug', 'info', 'warn', 'error'], |
33 |
| - help='Set the logging level') |
| 31 | + parser = argparse.ArgumentParser() |
34 | 32 |
|
35 |
| - args = parser.parse_args() |
| 33 | + parser.add_argument("youtube_url", type=str, help="URL to youtube video") |
| 34 | + parser.add_argument("-l", "--log", type=str, choices=['debug', 'info', 'warn', 'error'], |
| 35 | + help='Set the logging level') |
36 | 36 |
|
37 |
| - return args |
| 37 | + args = parser.parse_args() |
| 38 | + |
| 39 | + return args |
38 | 40 |
|
39 | 41 |
|
40 | 42 | def main():
|
41 |
| - # parse arguments |
42 |
| - args = parse_args() |
43 |
| - video_id = extract_video_id_from_url(args.youtube_url) |
44 |
| - |
45 |
| - # load environment variables |
46 |
| - load_dotenv() |
47 |
| - |
48 |
| - # prioritize log level provided on CLI, fallback to env variable |
49 |
| - log_level = args.log if args.log else os.getenv("LOG_LEVEL") |
50 |
| - api_key = os.getenv("API_KEY") |
51 |
| - db_file = os.getenv("DB_FILE") |
52 |
| - |
53 |
| - # set up logging |
54 |
| - logger = Logger(log_level) |
55 |
| - log = logger.get_logger() |
56 |
| - |
57 |
| - # pull comments from specified youtube video |
58 |
| - youtube = YouTubeDataAPI(logger, api_key) |
59 |
| - comments_df = youtube.get_comments(video_id) |
60 |
| - |
61 |
| - if not comments_df.empty: |
62 |
| - comments_df['PSentiment'] = '' |
63 |
| - comments_df['NSentiment'] = '' |
64 |
| - |
65 |
| - sa = SentimentAnalysis(logger) |
66 |
| - |
67 |
| - for index, row in comments_df.iterrows(): |
68 |
| - sentiment = sa.get_sentiment(row['comment']) |
69 |
| - comments_df.loc[index, 'PSentiment'] = sentiment[0] |
70 |
| - comments_df.loc[index, 'NSentiment'] = sentiment[1] |
71 |
| - |
72 |
| - # Database logic |
73 |
| - db = AstroDB(logger, db_file) |
74 |
| - db.create_database() |
75 |
| - db.insert_comment_dataframe(video_id, comments_df) |
76 |
| - |
77 |
| - log.debug('Collected data preview: \n{}'.format(comments_df)) |
| 43 | + # parse arguments |
| 44 | + args = parse_args() |
| 45 | + video_id = extract_video_id_from_url(args.youtube_url) |
| 46 | + |
| 47 | + # load environment variables |
| 48 | + load_dotenv() |
| 49 | + |
| 50 | + # prioritize log level provided on CLI, fallback to env variable |
| 51 | + log_level = args.log if args.log else os.getenv("LOG_LEVEL") |
| 52 | + api_key = os.getenv("API_KEY") |
| 53 | + db_file = os.getenv("DB_FILE") |
| 54 | + |
| 55 | + # set up logging |
| 56 | + logger = Logger(log_level) |
| 57 | + log = logger.get_logger() |
| 58 | + |
| 59 | + # pull comments from specified youtube video |
| 60 | + youtube = YouTubeDataAPI(logger, api_key) |
| 61 | + comments_df = youtube.get_comments(video_id) |
| 62 | + |
| 63 | + if not comments_df.empty: |
| 64 | + comments_df['PSentiment'] = '' |
| 65 | + comments_df['NSentiment'] = '' |
| 66 | + |
| 67 | + sa = SentimentAnalysis(logger) |
| 68 | + |
| 69 | + for index, row in comments_df.iterrows(): |
| 70 | + sentiment = sa.get_sentiment(row['comment']) |
| 71 | + comments_df.loc[index, 'PSentiment'] = sentiment[0] |
| 72 | + comments_df.loc[index, 'NSentiment'] = sentiment[1] |
| 73 | + |
| 74 | + # Database logic |
| 75 | + db = AstroDB(logger, db_file) |
| 76 | + db.create_database() |
| 77 | + db.insert_comment_dataframe(video_id, comments_df) |
| 78 | + |
| 79 | + log.debug('Collected data preview: \n{}'.format(comments_df)) |
| 80 | + |
78 | 81 |
|
79 | 82 | if __name__ == "__main__":
|
80 |
| - main() |
| 83 | + main() |
0 commit comments