-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Open
Labels
stage/needs-triageAutomatically applied to new issues and PRs, indicating they haven't been looked at.Automatically applied to new issues and PRs, indicating they haven't been looked at.
Description
Description:
When I try to locally invoke a particular lambda I receive the error:
[ERROR] Runtime.ImportModuleError: Unable to import module 'index': No module named 'regex._regex'
Traceback (most recent call last):
09 Oct 2025 17:53:53,221 [ERROR] (rapid) Init failed error=Runtime exited with error: exit status 1 InvokeID=
09 Oct 2025 17:53:53,224 [ERROR] (rapid) Invoke failed error=Runtime exited with error: exit status 1 InvokeID=caeb871a-4519-48b9-809a-351ebb7a71d9
09 Oct 2025 17:53:53,225 [ERROR] (rapid) Invoke DONE failed: Sandbox.Failure
{"errorMessage": "Unable to import module 'index': No module named 'regex._regex'", "errorType": "Runtime.ImportModuleError", "requestId": "", "stackTrace": []}
When I look in the asset folder referenced in the debug output I see the regex
library.
cdk.out/asset.d20346a9df67baa95ee488ff2aa746cd84c0bab186bf47cdf2c4c35 65a0aece2

Steps to reproduce:
I'm not certain the steps to reproduce the issue but the general setup is
requirements.txt
textstat>=0.7.3
CDK Lambda definition
stepreads_processor_lambda = _lambda.Function(
self, "StepReadsProcessorLambda",
function_name=f"{props.app_config.name}-stepreads-processor-lambda-{props.env_config.name}",
runtime=_lambda.Runtime.PYTHON_3_12,
handler="index.handler",
code=_lambda.Code.from_asset(
"src-backend/stepreads-processor-lambda",
bundling=cdk.BundlingOptions(
image=_lambda.Runtime.PYTHON_3_12.bundling_image,
command=[
"bash", "-c",
"pip install -r requirements.txt -t /asset-output && cp -au . /asset-output"
]
)
),
timeout=cdk.Duration.seconds(300), # 5 minutes for complete pipeline with retries
memory_size=1024, # Increased for text generation workload
environment={
"ACCOUNT_ID": props.env_config.account,
"DYNAMODB_TABLE_NAME": props.storage_stack.dynamodb_table.table_name,
"DYNAMODB_TABLE_LEXILE_GSI_NAME": props.storage_stack.dynamodb_table_lexile_gsi_name,
"INPUT_BUCKET_NAME": props.storage_stack.input_bucket.bucket_name,
"OUTPUT_BUCKET_NAME": props.storage_stack.output_bucket.bucket_name,
"VECTOR_BUCKET_NAME": props.storage_stack.vector_bucket.vector_bucket_name,
"GRADED_TEXT_VECTOR_INDEX_NAME": props.storage_stack.graded_text_vector_index.index_name,
"BEDROCK_EMBEDDING_MODEL_ID": props.app_config.bedrock.embedding_model_id,
"BEDROCK_TEXT_GENERATION_MODEL_ID": props.app_config.bedrock.text_generation_model_id,
"BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN": props.app_config.bedrock.text_generation_inference_profile_arn,
"STEPREADS_MAX_RETRY_COUNT": str(props.env_config.stepreads.default_retry_count), # Configurable retry count
"STEPREADS_FKG_VALIDATION_TOLERANCE": str(props.env_config.stepreads.default_fkg_validation_tolerance), # Allow ±1.0 grade level difference (more lenient)
"STEPREADS_SNS_TOPIC_ARN": props.storage_stack.stepreads_notifications_topic.topic_arn,
"DEFAULT_PCT_FOR_TARGET_LEXILE_STEP": str(props.env_config.stepreads.default_pct_for_target_lexile_step),
"DEFAULT_TARGET_GRADE_MOVE": str(props.env_config.stepreads.default_target_grade_move),
"DEFAULT_SIMILARITY_THRESHOLD": str(props.env_config.stepreads.default_similarity_threshold),
"DEFAULT_RETRY_COUNT": str(props.env_config.stepreads.default_retry_count),
},
log_group=props.storage_stack.stepreads_processor_lambda_log_group
)
The initial lambda code looks like this:
import os
# Configure NLTK environment BEFORE any other imports
os.environ['NLTK_DATA'] = '/tmp/nltk_data'
import json
import traceback
import boto3
import textstat
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional, Tuple
# Configure NLTK data path for Lambda environment
import nltk
nltk.data.path = ['/tmp/nltk_data']
# Initialize AWS clients
s3 = boto3.client('s3')
s3vectors = boto3.client('s3vectors')
dynamodb = boto3.resource('dynamodb')
bedrock = boto3.client('bedrock-runtime')
sns = boto3.client('sns')
# Environment variables
DYNAMODB_TABLE_NAME = os.environ.get('DYNAMODB_TABLE_NAME')
DYNAMODB_TABLE_LEXILE_GSI_NAME = os.environ.get('DYNAMODB_TABLE_LEXILE_GSI_NAME')
OUTPUT_BUCKET_NAME = os.environ.get('OUTPUT_BUCKET_NAME')
VECTOR_BUCKET_NAME = os.environ.get('VECTOR_BUCKET_NAME')
GRADED_TEXT_VECTOR_INDEX_NAME = os.environ.get('GRADED_TEXT_VECTOR_INDEX_NAME')
BEDROCK_EMBEDDING_MODEL_ID = os.environ.get('BEDROCK_EMBEDDING_MODEL_ID', 'amazon.titan-embed-text-v2:0')
BEDROCK_TEXT_GENERATION_MODEL_ID = os.environ.get('BEDROCK_TEXT_GENERATION_MODEL_ID')
BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN = os.environ.get('BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN')
STEPREADS_SNS_TOPIC_ARN = os.environ.get('STEPREADS_SNS_TOPIC_ARN')
# Configurable default lexile percentage for target lexile step via environment variable (default: 0.2)
# Determines the range of lexile values to consider for similar texts
DEFAULT_PCT_FOR_TARGET_LEXILE_STEP = os.environ.get('DEFAULT_PCT_FOR_TARGET_LEXILE_STEP', '0.2')
# Configurable retry count via environment variable
MAX_RETRY_COUNT = int(os.environ.get('STEPREADS_MAX_RETRY_COUNT', '5'))
# Configurable grade tolerance via environment variable (default: 1.0 for better success rate)
FKG_VALIDATION_TOLERANCE = float(os.environ.get('STEPREADS_FKG_VALIDATION_TOLERANCE', '1.0'))
# Constants - New organized structure in output bucket
STATUS_FILE_PREFIX = "stepreads/status"
EMBEDDING_DIMENSIONS = 1024
MAX_SIMILAR_TEXTS = 5
def handler(event, context):
"""
Main Lambda handler for SQS-triggered StepReads processing
"""
print("🚀 STEPREADS PROCESSOR LAMBDA STARTED")
print("=" * 60)
print(f"Event: {json.dumps(event)}")
print(f"📊 Lambda Info: Memory={context.memory_limit_in_mb}MB, Timeout={context.get_remaining_time_in_millis()//1000}s")
print(f"📦 Records to process: {len(event.get('Records', []))}")
print("=" * 60)
Observed result:
Expected result:
Execution of code in my handler
Additional environment details (Ex: Windows, Mac, Amazon Linux etc)
{
"version": "1.144.0",
"system": {
"python": "3.11.10",
"os": "macOS-15.6.1-arm64-arm-64bit"
},
"additional_dependencies": {
"docker_engine": "28.4.0",
"aws_cdk": "2.1029.1 (build b45b1ab)",
"terraform": "1.13.1"
},
"available_beta_feature_env_vars": [
"SAM_CLI_BETA_FEATURES",
"SAM_CLI_BETA_BUILD_PERFORMANCE",
"SAM_CLI_BETA_TERRAFORM_SUPPORT",
"SAM_CLI_BETA_PACKAGE_PERFORMANCE",
"SAM_CLI_BETA_RUST_CARGO_LAMBDA"
]
}
Metadata
Metadata
Assignees
Labels
stage/needs-triageAutomatically applied to new issues and PRs, indicating they haven't been looked at.Automatically applied to new issues and PRs, indicating they haven't been looked at.