Skip to content

Commit 4c2fb08

Browse files
Enhance script with log download status and timestamp stripping
This commit adds two main enhancements to the `print_workflow_run_errors.py` script: 1. **Log Download Status (stderr)**: - Prints messages to stderr indicating the progress of log downloads (e.g., "INFO: Downloading log X/Y for job 'job_name'..."). - Prints a summary to stderr after all jobs are processed, showing how many logs were successfully fetched and processed (e.g., "INFO: Processed logs for S/T targeted failed jobs."). - Includes a warning on stderr if a specific job's log fails to download. 2. **Timestamp Stripping (stdout)**: - Implemented a function `strip_initial_timestamp` that uses a regex to remove ISO 8601-like timestamps from the beginning of log lines. - This stripping is applied to all log lines (from specific step segments or full job log fallbacks) before they are further processed by `grep` or printed as the 'last N lines'. This makes the logs cleaner and potentially easier for other tools or users to parse.
1 parent bcf6245 commit 4c2fb08

File tree

1 file changed

+53
-32
lines changed

1 file changed

+53
-32
lines changed

scripts/print_workflow_run_errors.py

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ def requests_retry_session(retries=RETRIES,
6464
session.mount('https://', adapter)
6565
return session
6666

67+
# Regex to match ISO 8601 timestamps like "2023-10-27T18:30:59.1234567Z " or "2023-10-27T18:30:59Z "
68+
TIMESTAMP_REGEX = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z\s*")
69+
70+
def strip_initial_timestamp(line: str) -> str:
71+
"""Removes an ISO 8601-like timestamp from the beginning of a line if present."""
72+
return TIMESTAMP_REGEX.sub("", line)
73+
6774

6875
def get_current_branch_name():
6976
"""Gets the current git branch name."""
@@ -307,14 +314,24 @@ def parse_repo_url_arg(url_string):
307314

308315
print(f"\n# Detailed Logs for Failed Jobs (matching pattern '{args.job_pattern}') for Workflow Run ID: {run['id']} ([Run Link]({run.get('html_url', 'No URL')}))\n")
309316

310-
for job in failed_jobs_matching_criteria:
317+
total_failed_jobs_to_process = len(failed_jobs_matching_criteria)
318+
successful_log_fetches = 0
319+
320+
for idx, job in enumerate(failed_jobs_matching_criteria):
321+
sys.stderr.write(f"INFO: Downloading log {idx+1}/{total_failed_jobs_to_process} for job '{job['name']}' (ID: {job['id']})...\n")
322+
job_logs = get_job_logs(args.token, job['id'])
323+
311324
print(f"\n## Job: {job['name']} (ID: {job['id']}) - FAILED")
312325
print(f"[Job URL]({job.get('html_url', 'N/A')})\n")
313326

314-
job_logs = get_job_logs(args.token, job['id'])
315327
if not job_logs:
316-
print("Could not retrieve logs for this job.")
317-
continue
328+
print("**Could not retrieve logs for this job.**")
329+
# Also print to stderr if it's a critical failure to fetch
330+
sys.stderr.write(f"WARNING: Failed to retrieve logs for job '{job['name']}' (ID: {job['id']}).\n")
331+
continue # Skip to the next job
332+
333+
successful_log_fetches += 1
334+
# If logs were fetched, proceed to process them (already existing logic)
318335

319336
failed_steps_details = []
320337
if job.get('steps'):
@@ -324,14 +341,17 @@ def parse_repo_url_arg(url_string):
324341

325342
if not failed_steps_details: # No specific failed steps found in API, but job is failed
326343
print("\n**Note: No specific failed steps were identified in the job's metadata, but the job itself is marked as failed.**")
327-
log_lines_for_job_fallback = job_logs.splitlines()
344+
345+
# Apply timestamp stripping to the full job log
346+
stripped_log_lines_fallback = [strip_initial_timestamp(line) for line in job_logs.splitlines()]
347+
328348
if args.grep_pattern:
329349
print(f"Displaying grep results for pattern '{args.grep_pattern}' with context {args.grep_context} from **entire job log**:")
330350
print("\n```log")
331351
try:
332352
process = subprocess.run(
333353
['grep', '-E', f"-C{args.grep_context}", args.grep_pattern],
334-
input="\n".join(log_lines_for_job_fallback), text=True, capture_output=True, check=False
354+
input="\n".join(stripped_log_lines_fallback), text=True, capture_output=True, check=False
335355
)
336356
if process.returncode == 0: print(process.stdout.strip())
337357
elif process.returncode == 1: print(f"No matches found for pattern '{args.grep_pattern}' in entire job log.")
@@ -342,7 +362,7 @@ def parse_repo_url_arg(url_string):
342362
else:
343363
print(f"Displaying last {args.log_lines} lines from **entire job log** as fallback:")
344364
print("\n```log")
345-
for line in log_lines_for_job_fallback[-args.log_lines:]:
365+
for line in stripped_log_lines_fallback[-args.log_lines:]: # Use stripped lines
346366
print(line)
347367
print("```")
348368
print("\n---") # Horizontal rule
@@ -364,52 +384,52 @@ def parse_repo_url_arg(url_string):
364384
step_start_pattern = re.compile(r"^##\[group\](?:Run\s+|Setup\s+|Complete\s+)?.*?" + escaped_step_name, re.IGNORECASE)
365385
step_end_pattern = re.compile(r"^##\[endgroup\]")
366386

367-
current_step_log_segment = []
368-
capturing_for_failed_step = False
369-
log_lines_for_job = job_logs.splitlines() # Split once per job
387+
# Get raw lines for the entire job first
388+
raw_log_lines_for_job = job_logs.splitlines()
370389

371-
# Try to find the specific step's log segment
372-
for line in log_lines_for_job:
390+
current_step_raw_log_segment_lines = [] # Stores raw lines of the isolated step
391+
capturing_for_failed_step = False
392+
for line in raw_log_lines_for_job: # Iterate raw lines to find segment
373393
if step_start_pattern.search(line):
374394
capturing_for_failed_step = True
375-
current_step_log_segment = [line] # Start with the group line
395+
current_step_raw_log_segment_lines = [line]
376396
continue
377397
if capturing_for_failed_step:
378-
current_step_log_segment.append(line)
398+
current_step_raw_log_segment_lines.append(line)
379399
if step_end_pattern.search(line):
380400
capturing_for_failed_step = False
381-
# Found the end of the targeted step's log
382-
break # Stop processing lines for this step (within this job's logs)
401+
break
383402

384-
log_to_process = ""
403+
# Determine which set of lines to process (isolated step or full job) and strip timestamps
404+
lines_to_process_stripped = []
385405
log_source_message = ""
386406

387-
if current_step_log_segment:
388-
log_to_process = "\n".join(current_step_log_segment)
407+
if current_step_raw_log_segment_lines:
408+
lines_to_process_stripped = [strip_initial_timestamp(line) for line in current_step_raw_log_segment_lines]
389409
log_source_message = f"Log for failed step '{step_name}'"
390410
else:
391-
log_to_process = "\n".join(log_lines_for_job) # Use the full job log as fallback
411+
# Fallback to full job log if specific step segment couldn't be isolated
412+
lines_to_process_stripped = [strip_initial_timestamp(line) for line in raw_log_lines_for_job]
392413
log_source_message = f"Could not isolate log for step '{step_name}'. Using entire job log"
393414

415+
log_content_for_processing = "\n".join(lines_to_process_stripped)
416+
394417
if args.grep_pattern:
395418
print(f"\n{log_source_message} (grep results for pattern `{args.grep_pattern}` with context {args.grep_context}):\n")
396419
print("```log")
397420
try:
398-
# Using subprocess to call grep
399-
# Pass log_to_process as stdin to grep
400421
process = subprocess.run(
401422
['grep', '-E', f"-C{args.grep_context}", args.grep_pattern],
402-
input=log_to_process,
423+
input=log_content_for_processing, # Use stripped content
403424
text=True,
404425
capture_output=True,
405-
check=False # Do not throw exception on non-zero exit (e.g. no match)
426+
check=False
406427
)
407-
if process.returncode == 0: # Match found
428+
if process.returncode == 0:
408429
print(process.stdout.strip())
409-
elif process.returncode == 1: # No match found
430+
elif process.returncode == 1:
410431
print(f"No matches found for pattern '{args.grep_pattern}' in this log segment.")
411-
else: # Grep error
412-
# Print error within the log block if possible, or as a note if it's too disruptive
432+
else:
413433
print(f"Grep command failed with error code {process.returncode}. Stderr:\n{process.stderr}")
414434
except FileNotFoundError:
415435
sys.stderr.write("Error: 'grep' command not found. Please ensure it is installed and in your PATH to use --grep-pattern.\n")
@@ -419,12 +439,10 @@ def parse_repo_url_arg(url_string):
419439
print("Skipping log display due to an error with grep.")
420440
print("```")
421441
else:
422-
# Default behavior: print last N lines
423442
print(f"\n{log_source_message} (last {args.log_lines} lines):\n")
424443
print("```log")
425-
# current_step_log_segment is a list of lines, log_lines_for_job is also a list of lines
426-
lines_to_print_from = current_step_log_segment if current_step_log_segment else log_lines_for_job
427-
for log_line in lines_to_print_from[-args.log_lines:]:
444+
# Print from the already stripped lines (lines_to_process_stripped)
445+
for log_line in lines_to_process_stripped[-args.log_lines:]:
428446
print(log_line)
429447
print("```")
430448

@@ -433,6 +451,9 @@ def parse_repo_url_arg(url_string):
433451

434452
print(f"\n---") # Horizontal rule after all steps for a job
435453

454+
# Print final summary of log fetching to stderr
455+
sys.stderr.write(f"\nINFO: Processed logs for {successful_log_fetches}/{total_failed_jobs_to_process} targeted failed jobs.\n")
456+
436457

437458
def get_latest_workflow_run(token, workflow_name, branch_name):
438459
"""Fetches the most recent workflow run for a given workflow name and branch."""

0 commit comments

Comments
 (0)