|
17 | 17 |
|
18 | 18 | logger = logging.getLogger(__name__) |
19 | 19 |
|
| 20 | + |
| 21 | +def safe_task_ready(task, max_retries=3, retry_delay=0.5): |
| 22 | + """ |
| 23 | + Safely check if a Celery task is ready with retry logic for Redis connection errors. |
| 24 | +
|
| 25 | + Celery's task.ready() uses its internal Redis connection which can get reset. |
| 26 | + This wrapper adds retry logic to handle transient connection failures. |
| 27 | +
|
| 28 | + Args: |
| 29 | + task: Celery AsyncResult object |
| 30 | + max_retries: Number of retry attempts |
| 31 | + retry_delay: Delay between retries in seconds |
| 32 | +
|
| 33 | + Returns: |
| 34 | + bool: True if task is ready, False if not ready or on persistent error |
| 35 | + """ |
| 36 | + import redis |
| 37 | + |
| 38 | + for attempt in range(max_retries): |
| 39 | + try: |
| 40 | + return task.ready() |
| 41 | + except (redis.ConnectionError, redis.TimeoutError, ConnectionResetError, AttributeError) as e: |
| 42 | + if attempt < max_retries - 1: |
| 43 | + logger.warning(f"Redis connection error checking task status (attempt {attempt + 1}/{max_retries}): {e}") |
| 44 | + time.sleep(retry_delay) |
| 45 | + else: |
| 46 | + logger.error(f"Failed to check task status after {max_retries} attempts: {e}") |
| 47 | + # Return False to keep task in active list and retry later |
| 48 | + return False |
| 49 | + except Exception as e: |
| 50 | + logger.error(f"Unexpected error checking task status: {e}") |
| 51 | + return False |
| 52 | + return False |
| 53 | + |
| 54 | + |
| 55 | +def safe_task_get(task, timeout=1, max_retries=3, retry_delay=0.5): |
| 56 | + """ |
| 57 | + Safely get a Celery task result with retry logic for Redis connection errors. |
| 58 | +
|
| 59 | + Args: |
| 60 | + task: Celery AsyncResult object |
| 61 | + timeout: Timeout for getting the result |
| 62 | + max_retries: Number of retry attempts |
| 63 | + retry_delay: Delay between retries in seconds |
| 64 | +
|
| 65 | + Returns: |
| 66 | + Result dict or None on error |
| 67 | + """ |
| 68 | + import redis |
| 69 | + |
| 70 | + for attempt in range(max_retries): |
| 71 | + try: |
| 72 | + return task.get(timeout=timeout) |
| 73 | + except (redis.ConnectionError, redis.TimeoutError, ConnectionResetError, AttributeError) as e: |
| 74 | + if attempt < max_retries - 1: |
| 75 | + logger.warning(f"Redis connection error getting task result (attempt {attempt + 1}/{max_retries}): {e}") |
| 76 | + time.sleep(retry_delay) |
| 77 | + else: |
| 78 | + logger.error(f"Failed to get task result after {max_retries} attempts: {e}") |
| 79 | + raise |
| 80 | + except Exception: |
| 81 | + raise |
| 82 | + return None |
| 83 | + |
20 | 84 | class MaintenanceService: |
21 | 85 | """Service for maintenance operations like cleanup and file monitoring""" |
22 | 86 |
|
@@ -326,9 +390,9 @@ def _run_cleanup(self, cleanup_id, file_paths=None, schedule_id=None): |
326 | 390 | # Collect completed tasks and free up slots |
327 | 391 | still_active = [] |
328 | 392 | for task in active_tasks: |
329 | | - if task.ready(): |
| 393 | + if safe_task_ready(task): |
330 | 394 | try: |
331 | | - check_result = task.get(timeout=1) |
| 395 | + check_result = safe_task_get(task, timeout=1) |
332 | 396 | total_files_processed += 1 |
333 | 397 |
|
334 | 398 | if not check_result.get('exists'): |
@@ -717,9 +781,9 @@ def _run_file_changes_check(self, check_id: str, file_paths=None, schedule_id=No |
717 | 781 | still_active = [] |
718 | 782 | for task_info in active_tasks: |
719 | 783 | task, file_size = task_info['task'], task_info['size'] |
720 | | - if task.ready(): |
| 784 | + if safe_task_ready(task): |
721 | 785 | try: |
722 | | - result = task.get(timeout=1) |
| 786 | + result = safe_task_get(task, timeout=1) |
723 | 787 | total_files_processed += 1 |
724 | 788 |
|
725 | 789 | # Update last integrity check timestamp for this file |
|
0 commit comments