Merge pull request #34 from Andrei-Constantin-Programmer/IBM-32-Add-other-metrics-before-after-completion

avimallick · web-flow · commit e5446cdb6ba0 · 2025-08-29T13:55:09.000+01:00
IBM-32: Add other metrics before after completion
diff --git a/AntiPattern_Remediator/workflow/compute_metrics.py b/AntiPattern_Remediator/workflow/compute_metrics.py
@@ -0,0 +1,133 @@
+import json, os
+from pathlib import Path
+import lizard
+import re
+import javalang
+
+def calculate_nesting_depth(code: str) -> int:
+    """
+    Calculate the maximum nesting depth for a function.
+    """
+    tree = javalang.parse.parse(code)
+
+    max_depth = 0
+
+    def walk(node, depth=0):
+        nonlocal max_depth
+        max_depth = max(max_depth, depth)
+        if isinstance(node, (javalang.tree.IfStatement,
+                             javalang.tree.ForStatement,
+                             javalang.tree.WhileStatement,
+                             javalang.tree.TryStatement,
+                             javalang.tree.SwitchStatement)):
+            depth += 1
+        for child in getattr(node, 'children', []):
+            if isinstance(child, (list, tuple)):
+                for c in child:
+                    if isinstance(c, javalang.ast.Node):
+                        walk(c, depth)
+            elif isinstance(child, javalang.ast.Node):
+                walk(child, depth)
+
+    walk(tree)
+    return max_depth
+
+
+def _process_lizard_result(lizard_result, source_code: str, filename: str, source_type: str = "file"):
+    """
+    Helper function to process lizard analysis results and extract metrics.
+    """
+    functions = []
+    
+    for fn in lizard_result.function_list:        
+
+        functions.append({
+            "name": fn.long_name,                 
+            "start_line": fn.start_line,
+            "end_line": fn.end_line,
+            "nloc": fn.nloc,                      # SLOC (non-comment LOC) for the function
+            "cyclomatic_complexity": fn.cyclomatic_complexity    
+            })
+        
+    # Calculate nesting depth using brace counting approach
+    nesting_depth = calculate_nesting_depth(source_code)
+
+    file_metrics = {
+        "file": filename,
+        "file_sloc_nloc": lizard_result.nloc,              # file-level SLOC (non-comment LOC)
+        "total_functions": len(functions),
+        "avg_cc": round(sum(f["cyclomatic_complexity"] for f in functions)/len(functions), 2) if functions else 0.0,
+        "max_cc": max((f["cyclomatic_complexity"] for f in functions), default=0),
+        "max_nd_in_file": nesting_depth,
+        "functions": functions,
+    }
+    
+    # Add source type indicator for string analysis
+    if source_type == "string":
+        file_metrics["source_type"] = "string"
+    
+    return file_metrics
+
+def analyze_file(path: Path):
+    """Analyze a Java file from file path."""
+    # Read the source code for custom nesting analysis
+    with open(path, 'r', encoding='utf-8') as f:
+        source_code = f.read()
+    
+    # Analyze with lizard
+    lizard_result = lizard.analyze_file(str(path))
+    
+    # Process the results using shared helper
+    return _process_lizard_result(lizard_result, source_code, str(path), "file")
+
+def analyze_source_code(source_code: str, filename: str = "AnalyzedCode.java"):
+    """Analyze Java source code directly from string."""
+    # Analyze with lizard
+    lizard_result = lizard.analyze_file.analyze_source_code(filename, source_code)
+    
+    # Process the results using shared helper
+    return _process_lizard_result(lizard_result, source_code, filename, "string")
+
+def compare_code_metrics(original_metrics, refactored_metrics, filename_prefix: str = "Code"):
+    """Compare metrics between original and refactored code."""
+
+    comparison = {
+        "improvements": {
+            "sloc_reduction": original_metrics["file_sloc_nloc"] - refactored_metrics["file_sloc_nloc"],
+            "function_count_change": refactored_metrics["total_functions"] - original_metrics["total_functions"],
+            "avg_cc_improvement": original_metrics["avg_cc"] - refactored_metrics["avg_cc"],
+            "max_cc_reduction": original_metrics["max_cc"] - refactored_metrics["max_cc"],
+            "max_nesting_reduction": original_metrics["max_nd_in_file"] - refactored_metrics["max_nd_in_file"]
+        }
+    }
+    return comparison
+
+if __name__ == "__main__":
+    import argparse
+    p = argparse.ArgumentParser()
+    p.add_argument("target", help="Path to a .java file or a directory")
+    p.add_argument("--output", "-o", help="Output JSON file path", default="metrics_results.json")
+    args = p.parse_args()
+
+    results = []
+    target = Path(args.target)
+    if target.is_dir():
+        for f in target.rglob("*.java"):
+            results.append(analyze_file(f))
+    else:
+        results.append(analyze_file(target))
+
+    # Save results to JSON file
+    output_path = Path(args.output)
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2)
+    
+    print(f"Metrics analysis complete. Results saved to: {output_path}")
+    print(f"Analyzed {len(results)} file(s)")
+    
+    # Print summary
+    if results:
+        total_functions = sum(r.get('total_functions', 0) for r in results)
+        avg_complexity = sum(r.get('avg_cc', 0) for r in results) / len(results)
+        print(f"Total functions analyzed: {total_functions}")
+        print(f"Average complexity across files: {avg_complexity:.2f}")
diff --git a/AntiPattern_Remediator/workflow/results_manager.py b/AntiPattern_Remediator/workflow/results_manager.py
@@ -8,11 +8,49 @@
 from pathlib import Path
 from datetime import datetime
 from colorama import Fore, Style
+from .compute_metrics import analyze_source_code, compare_code_metrics
+
+
+def compute_code_metrics(final_state: dict) -> dict:
+    """Compute metrics for original and refactored code."""
+    try:        
+        metrics_data = {}
+        
+        # Analyze original code
+        if final_state.get('code'):
+            original_metrics = analyze_source_code(final_state['code'], "Original.java")
+            # Remove functions list to keep it simple
+            original_metrics_simplified = {k: v for k, v in original_metrics.items() if k != 'functions'}
+            metrics_data['original_metrics'] = original_metrics_simplified
+        
+        # Analyze refactored code
+        if final_state.get('refactored_code'):
+            refactored_metrics = analyze_source_code(final_state['refactored_code'], "Refactored.java")
+            # Remove functions list to keep it simple
+            refactored_metrics_simplified = {k: v for k, v in refactored_metrics.items() if k != 'functions'}
+            metrics_data['refactored_metrics'] = refactored_metrics_simplified
+            
+            # Compare if both exist
+            if final_state.get('code'):
+                comparison = compare_code_metrics(
+                    original_metrics,  # Pass the full metrics objects
+                    refactored_metrics  # Pass the full metrics objects
+                )
+                # Only include the improvements, not the full original/refactored data
+                metrics_data['improvements'] = comparison['improvements']
+        
+        return metrics_data
+    except Exception as e:
+        print(Fore.YELLOW + f"Warning: Could not compute metrics: {e}" + Style.RESET_ALL)
+        return {}
 
 
 def save_intermediate_results(file_path: str, final_state: dict, settings, results_dir: str = "../processing_results") -> bool:
     """Save intermediate results from the agentic workflow for analysis in markdown format."""
     try:
+        # Compute code metrics
+        metrics_data = compute_code_metrics(final_state)
+
         if file_path != 'java_code_snippet' and not None:
             # Create results directory if it doesn't exist
             results_path = Path(results_dir)
@@ -170,25 +208,79 @@ def save_intermediate_results(file_path: str, final_state: dict, settings, resul
             
             markdown_content += '</div>\n\n'
             
-            # Add code comparison summary
-            if original_code and refactored_code:
-                original_lines = len(original_code.splitlines())
-                refactored_lines = len(refactored_code.splitlines())
-                line_change = refactored_lines - original_lines
-                line_change_str = f"+{line_change}" if line_change > 0 else str(line_change)
-                
-                markdown_content += f"**Code Metrics:**\n"
-                markdown_content += f"- Original Loc: {original_lines}\n"
-                markdown_content += f"- Refactored Loc: {refactored_lines}\n"
-                markdown_content += f"- LoC change: {line_change_str}\n\n"
         else:
             markdown_content += "No code available for comparison.\n\n"
         
+        # Add code metrics section
+        if metrics_data:
+            markdown_content += "---\n\n## Code Metrics\n\n"
+            
+            # Display Original and Refactored metrics side by side
+            if 'original_metrics' in metrics_data and 'refactored_metrics' in metrics_data:
+                orig = metrics_data['original_metrics']
+                refac = metrics_data['refactored_metrics']
+
+                markdown_content += "### Original vs Refactored Code \n\n"
+                markdown_content += "| Metric | Original | Refactored |\n"
+                markdown_content += "|--------|----------|------------|\n"
+                markdown_content += f"| **Source Lines of Code (SLOC)** | {orig.get('file_sloc_nloc', 'N/A')} | {refac.get('file_sloc_nloc', 'N/A')} |\n"
+                markdown_content += f"| **Total Functions** | {orig.get('total_functions', 'N/A')} | {refac.get('total_functions', 'N/A')} |\n"
+                markdown_content += f"| **Average Cyclomatic Complexity** | {orig.get('avg_cc', 'N/A')} | {refac.get('avg_cc', 'N/A')} |\n"
+                markdown_content += f"| **Max Cyclomatic Complexity** | {orig.get('max_cc', 'N/A')} | {refac.get('max_cc', 'N/A')} |\n"
+                markdown_content += f"| **Max Nesting Depth** | {orig.get('max_nd_in_file', 'N/A')} | {refac.get('max_nd_in_file', 'N/A')} |\n\n"
+            
+            elif 'original_metrics' in metrics_data:
+                orig = metrics_data['original_metrics']
+                markdown_content += "### Original Code Metrics\n\n"
+                markdown_content += f"- **Source Lines of Code (SLOC):** {orig.get('file_sloc_nloc', 'N/A')}\n"
+                markdown_content += f"- **Total Functions:** {orig.get('total_functions', 'N/A')}\n"
+                markdown_content += f"- **Average Cyclomatic Complexity:** {orig.get('avg_cc', 'N/A')}\n"
+                markdown_content += f"- **Max Cyclomatic Complexity:** {orig.get('max_cc', 'N/A')}\n"
+                markdown_content += f"- **Max Nesting Depth:** {orig.get('max_nd_in_file', 'N/A')}\n\n"
+            
+            # Display Improvements
+            if 'improvements' in metrics_data:
+                imp = metrics_data['improvements']
+                markdown_content += "### Comparison\n\n"
+
+                sloc_change = imp.get('sloc_reduction', 0)
+                func_change = imp.get('function_count_change', 0)
+                avg_cc_imp = imp.get('avg_cc_improvement', 0)
+                max_cc_red = imp.get('max_cc_reduction', 0)
+                nest_red = imp.get('max_nesting_reduction', 0)
+                
+                markdown_content += f"- **Lines of Code Change:** {sloc_change:+d} lines\n"
+                markdown_content += f"- **Function Count Change:** {func_change:+d} functions\n"
+                markdown_content += f"- **Average Complexity Improvement:** {avg_cc_imp:+.2f}\n"
+                markdown_content += f"- **Max Complexity Reduction:** {max_cc_red:+d}\n"
+                markdown_content += f"- **Max Nesting Reduction:** {nest_red:+d}\n\n"
+        
         markdown_content += f"---\n\n*Generated by AntiPattern Remediator Tool using {settings.LLM_MODEL}*\n"        
         # Save to markdown file
         with open(results_file_path, 'w', encoding='utf-8') as f:
             f.write(markdown_content)
         
+        # Save metrics to JSON file if available
+        if metrics_data:
+            if file_path != 'java_code_snippet':
+                json_filename = f"{safe_filename}_metrics.json"
+                json_file_path = results_path / json_filename
+            else:
+                json_filename = "java_code_snippet_metrics.json"
+                json_file_path = json_filename
+            
+            # Include file path and timestamp in metrics JSON
+            metrics_with_metadata = {
+                "file_path": file_path,
+                "timestamp": timestamp,
+                "metrics": metrics_data
+            }
+            
+            with open(json_file_path, 'w', encoding='utf-8') as f:
+                json.dump(metrics_with_metadata, f, indent=2)
+            
+            print(Fore.CYAN + f"Code metrics saved: {json_file_path}" + Style.RESET_ALL)
+        
         print(Fore.CYAN + f"Intermediate results saved: {results_file_path}" + Style.RESET_ALL)
         return True
         
diff --git a/requirements.txt b/requirements.txt
@@ -48,6 +48,7 @@ idna==3.10
 importlib_metadata==8.7.0
 importlib_resources==6.5.2
 iniconfig==2.1.0
+javalang==0.13.0
 jaraco.classes==3.4.0
 jaraco.context==6.0.1
 jaraco.functools==4.2.1
@@ -70,6 +71,7 @@ langgraph-checkpoint==2.1.0
 langgraph-prebuilt==0.5.2
 langgraph-sdk==0.1.72
 langsmith==0.4.4
+lizard==1.17.31
 lomond==0.3.3
 markdown-it-py==3.0.0
 marshmallow==3.26.1