Skip to content

Adding a cloudwatch scheduler to trigger the Cricsheet data downloading lambda #75

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions aws/mens_t20i_dataset_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(
cricsheet_data_downloading_bucket_name,
removal_policy=RemovalPolicy.DESTROY,
event_bridge_enabled=True,
# block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
)

######################################## DYNAMODB CONFIGURATIONS ################################################
Expand Down Expand Up @@ -101,7 +101,21 @@ def __init__(
resources=["*"],
)
)

# EventBridge Rule to trigger the Lambda every Monday at 12:00 AM UTC
event_bridge_rule_to_trigger_cricsheet_data_downloading_lambda = events.Rule(
self,
"event_bridge_rule_to_trigger_cricsheet_data_downloading_lambda",
schedule=events.Schedule.cron(
minute="0",
hour="0",
month="*",
week_day="MON",
year="*",
),
targets=[
events_targets.LambdaFunction(cricsheet_data_downloading_lambda)
],
)
# Lambda function for extracting deliverywise cricsheet data
cricsheet_deliverywise_data_extraction_lambda = _lambda.Function(
self,
Expand All @@ -120,7 +134,7 @@ def __init__(
pandas_layer,
],
memory_size=300,
timeout=Duration.minutes(10),
timeout=Duration.minutes(1),
)
# Permissions for lambda functions to the S3 bucket
cricsheet_data_downloading_bucket.grant_read_write(cricsheet_deliverywise_data_extraction_lambda)
Expand Down Expand Up @@ -181,7 +195,7 @@ def __init__(
pandas_layer,
],
memory_size=300,
timeout=Duration.minutes(10),
timeout=Duration.minutes(1),
)
# Permissions for lambda functions to the S3 bucket
cricsheet_data_downloading_bucket.grant_read_write(cricsheet_matchwise_data_extraction_lambda)
Expand Down Expand Up @@ -240,7 +254,7 @@ def __init__(
package_layer,
pandas_layer,
],
memory_size=1024,
memory_size=3000,
timeout=Duration.minutes(10),
)
# Permissions for lambda functions to the S3 bucket
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MATCHWISE_DATA_CSV_FILE_NAME
)
from mens_t20i_data_collector._lambdas.utils import (
exception_handler,
get_environmental_variable_value
)

Expand Down Expand Up @@ -39,8 +40,8 @@ def matchwise_data(self):
matchwise_dataframe: pd.DataFrame = pd.DataFrame(matchwise_data_cursor)
matchwise_dataframe.drop(columns=["_id"], inplace=True)
matchwise_dataframe.rename(columns={"index": "match_number"}, inplace=True)
# matchwise_dataframe["margin_runs"].astype(int)
matchwise_dataframe.sort_values(by=["match_number"], inplace=True)
matchwise_dataframe.sort_values(by=["date", "match_id"], inplace=True)
matchwise_dataframe["match_number"] = range(1, len(matchwise_dataframe) + 1)
return matchwise_dataframe

@property
Expand Down Expand Up @@ -73,18 +74,12 @@ def _convert_dataframe_to_csv_and_upload_to_s3(self, dataframe: pd.DataFrame, fi
logger.error(f"Failed to upload '{filename}' to S3: {str(e)}", exc_info=True)
raise


@exception_handler # noqa: Vulture
def handler(_, __): # noqa: Vulture
logger.info("Lambda function invoked.")
try:
dataset_preparation_handler = DatasetPreparationHandler()
dataset_preparation_handler.prepare_dataset()
return {
'statusCode': 200,
'body': 'Dataset successfully processed and uploaded.'
}
except Exception as e: # pylint: disable=broad-exception-caught
logger.error(f"Lambda function error: {str(e)}", exc_info=True)
return {
'statusCode': 500,
'body': f"Error processing dataset: {str(e)}"
}
"""
Lambda function handler to convert MongoDB data to CSV and upload to S3.
"""
dataset_preparation_handler = DatasetPreparationHandler()
dataset_preparation_handler.prepare_dataset()
return "Datasets prepared and uploaded successfully."