Fixing #61

Vitaliy Zakaznikov · Vitaliy Zakaznikov · commit ea66f3bdb498 · 2025-03-26T15:13:48.000-04:00
diff --git a/testflows/github/hetzner/runners/metrics.py b/testflows/github/hetzner/runners/metrics.py
@@ -17,6 +17,7 @@
 import logging
 from datetime import datetime
 
+from github.WorkflowRun import WorkflowRun, WorkflowRunJob
 from prometheus_client import Counter, Gauge, Histogram, Info
 from .estimate import get_server_price
 from .constants import standby_server_name_prefix, recycle_server_name_prefix
@@ -656,7 +657,7 @@ def update_runners(
         ).set(count)
 
 
-def update_jobs(workflow_runs):
+def update_jobs(run_jobs: list[(WorkflowRun, WorkflowRunJob)]):
     """Update all job-related metrics."""
     queued_count = 0
     running_count = 0
@@ -670,66 +671,66 @@ def update_jobs(workflow_runs):
     RUNNING_JOB_LABELS._metrics.clear()
     RUNNING_JOB_TIME._metrics.clear()
 
-    for run in workflow_runs:
-        for job in run.jobs():
-            # Normalize job status
-            status = normalize_status(job)
+    for run, job in run_jobs:
+
+        # Normalize job status
+        status = normalize_status(job)
+
+        job_info = {
+            "name": job.name,
+            "workflow_name": run.name,
+            "repository": run.repository.full_name,
+            "status": status,
+            "queued_at": job.raw_data.get("started_at", ""),
+            "run_attempt": str(run.run_attempt),
+            "run_number": str(run.run_number),
+            "head_branch": run.head_branch or "",
+            "head_sha": run.head_sha or "",
+        }
+
+        if status == "queued":
+            queued_count += 1
+            # Track detailed job info
+            QUEUED_JOB_INFO.labels(job_id=str(job.id), run_id=str(run.id)).info(
+                job_info
+            )
 
-            job_info = {
-                "name": job.name,
-                "workflow_name": run.name,
-                "repository": run.repository.full_name,
-                "status": status,
-                "queued_at": job.raw_data.get("started_at", ""),
-                "run_attempt": str(run.run_attempt),
-                "run_number": str(run.run_number),
-                "head_branch": run.head_branch or "",
-                "head_sha": run.head_sha or "",
-            }
+            # Track job labels
+            for label in job.raw_data.get("labels", []):
+                QUEUED_JOB_LABELS.labels(
+                    job_id=str(job.id), run_id=str(run.id), label=label.lower()
+                ).set(1)
 
-            if status == "queued":
-                queued_count += 1
-                # Track detailed job info
-                QUEUED_JOB_INFO.labels(job_id=str(job.id), run_id=str(run.id)).info(
-                    job_info
+            # Track job wait time
+            started_at = job.raw_data.get("started_at")
+            if started_at:
+                started_at = dateutil.parser.parse(started_at)
+                wait_time = current_time - started_at.timestamp()
+                QUEUED_JOB_WAIT_TIME.labels(job_id=str(job.id), run_id=str(run.id)).set(
+                    wait_time
                 )
 
-                # Track job labels
-                for label in job.raw_data.get("labels", []):
-                    QUEUED_JOB_LABELS.labels(
-                        job_id=str(job.id), run_id=str(run.id), label=label.lower()
-                    ).set(1)
-
-                # Track job wait time
-                started_at = job.raw_data.get("started_at")
-                if started_at:
-                    started_at = dateutil.parser.parse(started_at)
-                    wait_time = current_time - started_at.timestamp()
-                    QUEUED_JOB_WAIT_TIME.labels(
-                        job_id=str(job.id), run_id=str(run.id)
-                    ).set(wait_time)
-
-            elif status == "in_progress":
-                running_count += 1
-                # Track detailed job info
-                RUNNING_JOB_INFO.labels(job_id=str(job.id), run_id=str(run.id)).info(
-                    job_info
-                )
+        elif status == "in_progress":
+            running_count += 1
+            # Track detailed job info
+            RUNNING_JOB_INFO.labels(job_id=str(job.id), run_id=str(run.id)).info(
+                job_info
+            )
 
-                # Track job labels
-                for label in job.raw_data.get("labels", []):
-                    RUNNING_JOB_LABELS.labels(
-                        job_id=str(job.id), run_id=str(run.id), label=label.lower()
-                    ).set(1)
+            # Track job labels
+            for label in job.raw_data.get("labels", []):
+                RUNNING_JOB_LABELS.labels(
+                    job_id=str(job.id), run_id=str(run.id), label=label.lower()
+                ).set(1)
 
-                # Track job run time
-                started_at = job.raw_data.get("started_at")
-                if started_at:
-                    started_at = dateutil.parser.parse(started_at)
-                    run_time = current_time - started_at.timestamp()
-                    RUNNING_JOB_TIME.labels(job_id=str(job.id), run_id=str(run.id)).set(
-                        run_time
-                    )
+            # Track job run time
+            started_at = job.raw_data.get("started_at")
+            if started_at:
+                started_at = dateutil.parser.parse(started_at)
+                run_time = current_time - started_at.timestamp()
+                RUNNING_JOB_TIME.labels(job_id=str(job.id), run_id=str(run.id)).set(
+                    run_time
+                )
 
     QUEUED_JOBS.set(queued_count)
     RUNNING_JOBS.set(running_count)
diff --git a/testflows/github/hetzner/runners/scale_up.py b/testflows/github/hetzner/runners/scale_up.py
@@ -353,6 +353,38 @@ def raise_exception(exc):
     raise exc
 
 
+def get_job_labels(job):
+    """Get job labels."""
+    return set([label.lower() for label in job.raw_data["labels"]])
+
+
+def job_matches_labels(job_labels, with_label):
+    """Check if job matches with_label criteria."""
+    if with_label is None:
+        return True
+
+    for label in with_label:
+        if not label.lower() in job_labels:
+            return (False, label)
+
+    return True
+
+
+def filtered_run_jobs(workflow_runs, with_label):
+    """Filter jobs to select only queued or in progress and match with_label criteria."""
+    run_jobs = []
+    for run in workflow_runs:
+        for job in run.jobs():
+            if job.status == "completed":
+                continue
+            if not (job.status == "in_progress" or job.status == "queued"):
+                continue
+            labels = get_job_labels(job)
+            if job_matches_labels(labels, with_label) is True:
+                run_jobs.append((run, job))
+    return run_jobs
+
+
 def create_server(
     hetzner_token: str,
     setup_worker_pool: ThreadPoolExecutor,
@@ -854,8 +886,10 @@ def create_runner_server(
                     in_progress_runs = list(
                         repo.get_workflow_runs(status="in_progress")
                     )
-                    # Update job metrics with all runs
-                    metrics.update_jobs(queued_runs + in_progress_runs)
+                    # Update job metrics using only queued or in progress runs that match with_label criteria
+                    metrics.update_jobs(
+                        filtered_run_jobs(queued_runs + in_progress_runs, with_label)
+                    )
                     # For job processing, we'll use only queued runs
                     workflow_runs = queued_runs
 
@@ -952,10 +986,7 @@ def create_runner_server(
                                 ):
                                     pass
 
-                                labels = set(
-                                    [label.lower() for label in job.raw_data["labels"]]
-                                )
-
+                                labels = get_job_labels(job)
                                 server_name = (
                                     f"{server_name_prefix}{job.run_id}-{job.id}"
                                 )
@@ -1008,18 +1039,14 @@ def create_runner_server(
                                         ):
                                             break
 
-                                    if with_label is not None:
-                                        found_all_with_labels = True
-                                        for label in with_label:
-                                            if not label.lower() in labels:
-                                                found_all_with_labels = False
-                                                with Action(
-                                                    f"Skipping {job} with {labels} as it is missing label '{label}'",
-                                                    server_name=server_name,
-                                                    interval=interval,
-                                                ):
-                                                    break
-                                        if not found_all_with_labels:
+                                    result = job_matches_labels(labels, with_label)
+                                    if result is not True:
+                                        _, missing_label = result
+                                        with Action(
+                                            f"Skipping {job} with {labels} as it is missing label '{missing_label}'",
+                                            server_name=server_name,
+                                            interval=interval,
+                                        ):
                                             continue
 
                                     with Action(