Skip to content

Bug: Lambda local invoke fails on missing module but not when deployed #8325

@rupe120

Description

@rupe120

Description:

When I try to locally invoke a particular lambda I receive the error:

[ERROR] Runtime.ImportModuleError: Unable to import module 'index': No module named 'regex._regex'
Traceback (most recent call last):
09 Oct 2025 17:53:53,221 [ERROR] (rapid) Init failed error=Runtime exited with error: exit status 1 InvokeID=
09 Oct 2025 17:53:53,224 [ERROR] (rapid) Invoke failed error=Runtime exited with error: exit status 1 InvokeID=caeb871a-4519-48b9-809a-351ebb7a71d9
09 Oct 2025 17:53:53,225 [ERROR] (rapid) Invoke DONE failed: Sandbox.Failure
{"errorMessage": "Unable to import module 'index': No module named 'regex._regex'", "errorType": "Runtime.ImportModuleError", "requestId": "", "stackTrace": []}

When I look in the asset folder referenced in the debug output I see the regex library.

cdk.out/asset.d20346a9df67baa95ee488ff2aa746cd84c0bab186bf47cdf2c4c35 65a0aece2

Image

Steps to reproduce:

I'm not certain the steps to reproduce the issue but the general setup is

requirements.txt

textstat>=0.7.3

CDK Lambda definition

        stepreads_processor_lambda = _lambda.Function(
            self, "StepReadsProcessorLambda",
            function_name=f"{props.app_config.name}-stepreads-processor-lambda-{props.env_config.name}",
            runtime=_lambda.Runtime.PYTHON_3_12,
            handler="index.handler",
            code=_lambda.Code.from_asset(
                "src-backend/stepreads-processor-lambda",
                bundling=cdk.BundlingOptions(
                    image=_lambda.Runtime.PYTHON_3_12.bundling_image,
                    command=[
                        "bash", "-c",
                        "pip install -r requirements.txt -t /asset-output && cp -au . /asset-output"
                    ]
                )
            ),
            timeout=cdk.Duration.seconds(300),  # 5 minutes for complete pipeline with retries
            memory_size=1024,  # Increased for text generation workload
            environment={
                "ACCOUNT_ID": props.env_config.account,
                "DYNAMODB_TABLE_NAME": props.storage_stack.dynamodb_table.table_name,
                "DYNAMODB_TABLE_LEXILE_GSI_NAME": props.storage_stack.dynamodb_table_lexile_gsi_name,
                "INPUT_BUCKET_NAME": props.storage_stack.input_bucket.bucket_name,
                "OUTPUT_BUCKET_NAME": props.storage_stack.output_bucket.bucket_name,
                "VECTOR_BUCKET_NAME": props.storage_stack.vector_bucket.vector_bucket_name,
                "GRADED_TEXT_VECTOR_INDEX_NAME": props.storage_stack.graded_text_vector_index.index_name,
                "BEDROCK_EMBEDDING_MODEL_ID": props.app_config.bedrock.embedding_model_id,
                "BEDROCK_TEXT_GENERATION_MODEL_ID": props.app_config.bedrock.text_generation_model_id,
                "BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN": props.app_config.bedrock.text_generation_inference_profile_arn,
                "STEPREADS_MAX_RETRY_COUNT": str(props.env_config.stepreads.default_retry_count),  # Configurable retry count
                "STEPREADS_FKG_VALIDATION_TOLERANCE": str(props.env_config.stepreads.default_fkg_validation_tolerance),  # Allow ±1.0 grade level difference (more lenient)
                "STEPREADS_SNS_TOPIC_ARN": props.storage_stack.stepreads_notifications_topic.topic_arn,
                "DEFAULT_PCT_FOR_TARGET_LEXILE_STEP": str(props.env_config.stepreads.default_pct_for_target_lexile_step),
                "DEFAULT_TARGET_GRADE_MOVE": str(props.env_config.stepreads.default_target_grade_move),
                "DEFAULT_SIMILARITY_THRESHOLD": str(props.env_config.stepreads.default_similarity_threshold),
                "DEFAULT_RETRY_COUNT": str(props.env_config.stepreads.default_retry_count),
            },
            log_group=props.storage_stack.stepreads_processor_lambda_log_group
        )

The initial lambda code looks like this:

import os

# Configure NLTK environment BEFORE any other imports
os.environ['NLTK_DATA'] = '/tmp/nltk_data'

import json
import traceback
import boto3
import textstat
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional, Tuple

# Configure NLTK data path for Lambda environment
import nltk
nltk.data.path = ['/tmp/nltk_data']

# Initialize AWS clients
s3 = boto3.client('s3')
s3vectors = boto3.client('s3vectors')
dynamodb = boto3.resource('dynamodb')
bedrock = boto3.client('bedrock-runtime')
sns = boto3.client('sns')

# Environment variables
DYNAMODB_TABLE_NAME = os.environ.get('DYNAMODB_TABLE_NAME')
DYNAMODB_TABLE_LEXILE_GSI_NAME = os.environ.get('DYNAMODB_TABLE_LEXILE_GSI_NAME')
OUTPUT_BUCKET_NAME = os.environ.get('OUTPUT_BUCKET_NAME')
VECTOR_BUCKET_NAME = os.environ.get('VECTOR_BUCKET_NAME')
GRADED_TEXT_VECTOR_INDEX_NAME = os.environ.get('GRADED_TEXT_VECTOR_INDEX_NAME')
BEDROCK_EMBEDDING_MODEL_ID = os.environ.get('BEDROCK_EMBEDDING_MODEL_ID', 'amazon.titan-embed-text-v2:0')
BEDROCK_TEXT_GENERATION_MODEL_ID = os.environ.get('BEDROCK_TEXT_GENERATION_MODEL_ID')
BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN = os.environ.get('BEDROCK_TEXT_GENERATION_INFERENCE_PROFILE_ARN')
STEPREADS_SNS_TOPIC_ARN = os.environ.get('STEPREADS_SNS_TOPIC_ARN')
# Configurable default lexile percentage for target lexile step via environment variable (default: 0.2)
# Determines the range of lexile values to consider for similar texts
DEFAULT_PCT_FOR_TARGET_LEXILE_STEP = os.environ.get('DEFAULT_PCT_FOR_TARGET_LEXILE_STEP', '0.2')

# Configurable retry count via environment variable
MAX_RETRY_COUNT = int(os.environ.get('STEPREADS_MAX_RETRY_COUNT', '5'))

# Configurable grade tolerance via environment variable (default: 1.0 for better success rate)
FKG_VALIDATION_TOLERANCE = float(os.environ.get('STEPREADS_FKG_VALIDATION_TOLERANCE', '1.0'))

# Constants - New organized structure in output bucket
STATUS_FILE_PREFIX = "stepreads/status"
EMBEDDING_DIMENSIONS = 1024
MAX_SIMILAR_TEXTS = 5

def handler(event, context):
    """
    Main Lambda handler for SQS-triggered StepReads processing
    """
    print("🚀 STEPREADS PROCESSOR LAMBDA STARTED")
    print("=" * 60)
    print(f"Event: {json.dumps(event)}")
    print(f"📊 Lambda Info: Memory={context.memory_limit_in_mb}MB, Timeout={context.get_remaining_time_in_millis()//1000}s")
    print(f"📦 Records to process: {len(event.get('Records', []))}")
    print("=" * 60)

Observed result:

sam-debug-out.txt

Expected result:

Execution of code in my handler

Additional environment details (Ex: Windows, Mac, Amazon Linux etc)

{
  "version": "1.144.0",
  "system": {
    "python": "3.11.10",
    "os": "macOS-15.6.1-arm64-arm-64bit"
  },
  "additional_dependencies": {
    "docker_engine": "28.4.0",
    "aws_cdk": "2.1029.1 (build b45b1ab)",
    "terraform": "1.13.1"
  },
  "available_beta_feature_env_vars": [
    "SAM_CLI_BETA_FEATURES",
    "SAM_CLI_BETA_BUILD_PERFORMANCE",
    "SAM_CLI_BETA_TERRAFORM_SUPPORT",
    "SAM_CLI_BETA_PACKAGE_PERFORMANCE",
    "SAM_CLI_BETA_RUST_CARGO_LAMBDA"
  ]
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    stage/needs-triageAutomatically applied to new issues and PRs, indicating they haven't been looked at.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions