From a179106de459b5f89ce6f93d2465f1472dfa2e8a Mon Sep 17 00:00:00 2001 From: NishanthMuruganantham Date: Sun, 18 May 2025 22:01:07 +0530 Subject: [PATCH] scheduler has been added --- aws/mens_t20i_dataset_stack.py | 24 +++++++++++++---- .../convert_mongo_db_data_to_csv_lambda.py | 27 ++++++++----------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/aws/mens_t20i_dataset_stack.py b/aws/mens_t20i_dataset_stack.py index 6f1a24d..dd35d8d 100644 --- a/aws/mens_t20i_dataset_stack.py +++ b/aws/mens_t20i_dataset_stack.py @@ -34,7 +34,7 @@ def __init__( cricsheet_data_downloading_bucket_name, removal_policy=RemovalPolicy.DESTROY, event_bridge_enabled=True, - # block_public_access=s3.BlockPublicAccess.BLOCK_ALL, + block_public_access=s3.BlockPublicAccess.BLOCK_ALL, ) ######################################## DYNAMODB CONFIGURATIONS ################################################ @@ -101,7 +101,21 @@ def __init__( resources=["*"], ) ) - + # EventBridge Rule to trigger the Lambda every Monday at 12:00 AM UTC + event_bridge_rule_to_trigger_cricsheet_data_downloading_lambda = events.Rule( + self, + "event_bridge_rule_to_trigger_cricsheet_data_downloading_lambda", + schedule=events.Schedule.cron( + minute="0", + hour="0", + month="*", + week_day="MON", + year="*", + ), + targets=[ + events_targets.LambdaFunction(cricsheet_data_downloading_lambda) + ], + ) # Lambda function for extracting deliverywise cricsheet data cricsheet_deliverywise_data_extraction_lambda = _lambda.Function( self, @@ -120,7 +134,7 @@ def __init__( pandas_layer, ], memory_size=300, - timeout=Duration.minutes(10), + timeout=Duration.minutes(1), ) # Permissions for lambda functions to the S3 bucket cricsheet_data_downloading_bucket.grant_read_write(cricsheet_deliverywise_data_extraction_lambda) @@ -181,7 +195,7 @@ def __init__( pandas_layer, ], memory_size=300, - timeout=Duration.minutes(10), + timeout=Duration.minutes(1), ) # Permissions for lambda functions to the S3 bucket cricsheet_data_downloading_bucket.grant_read_write(cricsheet_matchwise_data_extraction_lambda) @@ -240,7 +254,7 @@ def __init__( package_layer, pandas_layer, ], - memory_size=1024, + memory_size=3000, timeout=Duration.minutes(10), ) # Permissions for lambda functions to the S3 bucket diff --git a/src/mens_t20i_data_collector/_lambdas/convert_mongodb_data_to_csv/convert_mongo_db_data_to_csv_lambda.py b/src/mens_t20i_data_collector/_lambdas/convert_mongodb_data_to_csv/convert_mongo_db_data_to_csv_lambda.py index 0f9103c..7352041 100644 --- a/src/mens_t20i_data_collector/_lambdas/convert_mongodb_data_to_csv/convert_mongo_db_data_to_csv_lambda.py +++ b/src/mens_t20i_data_collector/_lambdas/convert_mongodb_data_to_csv/convert_mongo_db_data_to_csv_lambda.py @@ -9,6 +9,7 @@ MATCHWISE_DATA_CSV_FILE_NAME ) from mens_t20i_data_collector._lambdas.utils import ( + exception_handler, get_environmental_variable_value ) @@ -39,8 +40,8 @@ def matchwise_data(self): matchwise_dataframe: pd.DataFrame = pd.DataFrame(matchwise_data_cursor) matchwise_dataframe.drop(columns=["_id"], inplace=True) matchwise_dataframe.rename(columns={"index": "match_number"}, inplace=True) - # matchwise_dataframe["margin_runs"].astype(int) - matchwise_dataframe.sort_values(by=["match_number"], inplace=True) + matchwise_dataframe.sort_values(by=["date", "match_id"], inplace=True) + matchwise_dataframe["match_number"] = range(1, len(matchwise_dataframe) + 1) return matchwise_dataframe @property @@ -73,18 +74,12 @@ def _convert_dataframe_to_csv_and_upload_to_s3(self, dataframe: pd.DataFrame, fi logger.error(f"Failed to upload '{filename}' to S3: {str(e)}", exc_info=True) raise + +@exception_handler # noqa: Vulture def handler(_, __): # noqa: Vulture - logger.info("Lambda function invoked.") - try: - dataset_preparation_handler = DatasetPreparationHandler() - dataset_preparation_handler.prepare_dataset() - return { - 'statusCode': 200, - 'body': 'Dataset successfully processed and uploaded.' - } - except Exception as e: # pylint: disable=broad-exception-caught - logger.error(f"Lambda function error: {str(e)}", exc_info=True) - return { - 'statusCode': 500, - 'body': f"Error processing dataset: {str(e)}" - } + """ + Lambda function handler to convert MongoDB data to CSV and upload to S3. + """ + dataset_preparation_handler = DatasetPreparationHandler() + dataset_preparation_handler.prepare_dataset() + return "Datasets prepared and uploaded successfully."