Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions src/astro.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ def parse_args(astro_theme):
parser = argparse.ArgumentParser(description=description,
formatter_class=ArgumentDefaultsRichHelpFormatter)

parser.add_argument("youtube_url", type=str, help="youtube video URL")
parser.add_argument("-l", "--log", type=str, choices=['debug', 'info', 'warn', 'error'],
parser.add_argument('youtube_url', type=str, help='youtube video URL')
parser.add_argument('-l', '--log', type=str, choices=['debug', 'info', 'warn', 'error'],
help='Set the logging level', default='info')
parser.add_argument("--api-key", type=str, help="YouTube Data API key")
parser.add_argument("--db-file", type=str, help="database filename", default='astro.db')
parser.add_argument('--api-key', type=str, help='YouTube Data API key')
parser.add_argument('--db-file', type=str, help='database filename', default='astro.db')
parser.add_argument('--log-file', type=str, help='log output to specified file', default='astro_log.txt')
parser.add_argument('-j', '--log-json', type=bool, help='log json API responses',
default=False, action=argparse.BooleanOptionalAction)
args = parser.parse_args()

return args
Expand All @@ -63,19 +66,19 @@ def main():
log_level = args.log if args.log else os.getenv("LOG_LEVEL")
api_key = args.api_key if args.api_key else os.getenv("API_KEY")
db_file = args.db_file if args.db_file else os.getenv("DB_FILE")
log_file = args.log_file if args.log_file else os.getenv("LOG_FILE")
log_json = args.log_json if args.log_json else os.getenv("LOG_JSON")

# set up logging
logging.setLoggerClass(AstroLogger)
logger = logging.getLogger(__name__)
logger.astro_config(log_level, astro_theme)

logger.info('Collecting video data...')
logger.astro_config(log_level, astro_theme, log_file=log_file)

# collect metadata for provided video
youtube = YouTubeDataAPI(logger, api_key)
youtube = YouTubeDataAPI(logger, api_key, log_json)
video_data = youtube.get_video_metadata(video_id)

logger.print_object(video_data, title="Video data")
logger.print_video_data(video_data)

# check local database for existing data on provided video
db = AstroDB(logger, db_file)
Expand Down
13 changes: 12 additions & 1 deletion src/data_collection/yt_data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import traceback
import string
import json

from src.data_collection.data_structures import VideoData
from googleapiclient.discovery import build
Expand All @@ -13,10 +14,12 @@ class YouTubeDataAPI:
logger = None
api_key = None
youtube = None
log_json = False

def __init__(self, logger, api_key):
def __init__(self, logger, api_key, log_json=False):
self.logger = logger
self.api_key = api_key
self.log_json = log_json
self.youtube = build('youtube', 'v3', developerKey=self.api_key)

@staticmethod
Expand Down Expand Up @@ -119,6 +122,10 @@ def get_comments(self, video_data) -> pd.DataFrame:

try:
response = request.execute()
if self.log_json:
with self.logger.log_file_only():
self.logger.info(json.dumps(response, indent=4))

comment_dataframe, comments_added = self.parse_comment_api_response(response, comment_dataframe)
if 'nextPageToken' in response: # there are more comments to fetch
page_token = response['nextPageToken']
Expand Down Expand Up @@ -155,6 +162,10 @@ def get_video_metadata(self, video_id: str) -> VideoData:

try:
response = request.execute()
if self.log_json:
with self.logger.log_file_only():
self.logger.info(json.dumps(response, indent=4))

video_data = response['items'][0]['snippet']
video_stats = response['items'][0]['statistics']

Expand Down
60 changes: 38 additions & 22 deletions src/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rich.console import Console
from rich.table import Table
from rich.theme import Theme
from rich import print as rprint
from contextlib import contextmanager

from src.progress import AstroProgress

Expand All @@ -19,26 +19,42 @@ class AstroLogger(logging.Logger):
astro_text_color: str
astro_theme: Theme
progress: AstroProgress
log_file: str
console_handler: RichHandler
file_handler: logging.FileHandler

def astro_config(self, log_level_str: str, astro_theme):
def astro_config(self, log_level_str: str, astro_theme, log_file='astro_log.txt'):
"""
Custom logging config.
"""
# set log level
self.log_level_str = log_level_str
self.log_level = self.get_log_level(log_level_str)

self.setLevel(self.log_level)

# set color theme
self.astro_theme = astro_theme

# create console using the asto theme
self.console = self.astro_theme.get_console()

# create log handlers
self.log_file = log_file
self.console_handler = RichHandler(rich_tracebacks=True, console=self.console)
self.file_handler = logging.FileHandler(self.log_file)
log_handlers = [self.console_handler, self.file_handler]

# configure formatting for file handler
file_formatter = logging.Formatter(
'%(asctime)s:%(levelname)6s: %(filename)14s:%(lineno)-3d %(message)-60s',
'%Y-%m-%d %H:%M:%S')

self.file_handler.setFormatter(file_formatter)

# configure logging
logging.basicConfig(format='%(message)s',
level=self.log_level,
handlers=[RichHandler(rich_tracebacks=True,
console=self.console)])
handlers=log_handlers)

# suppress google logs
self.__suppress_logs('google', logging.WARNING)
Expand Down Expand Up @@ -89,25 +105,18 @@ def __rich_table(self, title=''):

return table

def print_object(self, obj, title=''):
def print_video_data(self, video_data):
"""
Print the attributes of the provided object. Useful for debugging.
Print VideoData object.
"""
if obj is None:
if video_data is None:
return

# only print objects in debug or info mode
if self.log_level > logging.INFO:
return
exclude_fields = ['filtered_comment_count']

table = self.__rich_table(title)
table.add_column("Attribute")
table.add_column("Value")

for attr, value in obj.__dict__.items():
table.add_row(attr, str(value))

self.console.print(table)
for attr, value in video_data.__dict__.items():
if attr not in exclude_fields:
self.info(f'{attr:>20}: {str(value)}')

def print_dataframe(self, df, title=''):
"""
Expand Down Expand Up @@ -145,8 +154,15 @@ def print_dataframe(self, df, title=''):

self.console.print(table)

def print_json(self, json_obj):
@contextmanager
def log_file_only(self):
"""
Easy way to print properly formatted json.
Provides a context in which logging will only go to the log file.
"""
rprint(json_obj)
# temporarily raise log level of console handler
self.console_handler.setLevel(logging.CRITICAL)

yield

# restore original log level
self.console_handler.setLevel(self.log_level)
7 changes: 3 additions & 4 deletions src/tests/test_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ def test_get_log_level(self, logger, level):

assert str(exception.value) == "Invalid logger level specified: {}".format(level)

@pytest.mark.parametrize('obj', test_video_data)
@pytest.mark.parametrize('title', ['title1', 'title-2', 'title_3'])
def test_print_object(self, logger, obj, title):
logger.print_object(obj, title=title)
@pytest.mark.parametrize('video_data', test_video_data)
def test_print_video_data(self, logger, video_data):
logger.print_video_data(video_data)

@pytest.mark.parametrize('title', ['title1', 'title-2', 'title_3'])
def test_print_dataframe(self, logger, comment_dataframe, title):
Expand Down
Loading