Add --dry-run flag to CLI and dry_run option to API (#61)

sangampaudel530 · web-flow · commit b5a6c86cf7a2 · 2025-10-18T14:00:16.000Z
* Add dry-run Feature

* Add verbose file-path listing and remove redundant main guard

* Remove unncessary block
diff --git a/forklet/__main__.py b/forklet/__main__.py
@@ -47,6 +47,7 @@ def cli(ctx, verbose: bool, token: Optional[str]):
               help='Download strategy')
 @click.option('--concurrent', '-c', default=5, help='Concurrent downloads')
 @click.option('--overwrite', '-f', is_flag=True, help='Overwrite existing files')
+@click.option('--dry-run', '-n', is_flag=True, help='Preview files to download without writing')
 @click.pass_context
 def download(
     ctx, 
@@ -65,7 +66,8 @@ def download(
     strategy: str,
     concurrent: int, 
     overwrite: bool,
-    no_progress: bool
+    no_progress: bool,
+    dry_run: bool
 ):
     """
     Download files from a GitHub repository.
@@ -104,7 +106,9 @@ async def run_download():
             token = token,
             concurrent = concurrent,
             overwrite = overwrite,
-            progress = not no_progress
+            progress = not no_progress,
+            dry_run = dry_run,
+            verbose = ctx.obj.get('verbose', False)
         )
 
     asyncio.run(run_download())
@@ -192,3 +196,5 @@ def version():
 ####    MAIN ENTRYPOINT FOR THE FORKLET CLI
 def main():
     cli()
+    
+ 
diff --git a/forklet/core/orchestrator.py b/forklet/core/orchestrator.py
@@ -144,19 +144,43 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult:
                 f"Filtered {filter_result.filtered_files}/{filter_result.total_files} "
                 "files for download"
             )
-            
-            # Prepare destination
-            if request.create_destination:
-                await self.download_service.ensure_directory(request.destination)
-            
-            # Create download result and set as current
+
+            # Create download result and set as current (so control operations can act)
             result = DownloadResult(
                 request=request,
                 status=DownloadStatus.IN_PROGRESS,
                 progress=progress,
                 started_at=datetime.now()
             )
+            # Expose matched file paths for verbose reporting
+            result.matched_files = [f.path for f in target_files]
             self._current_result = result
+
+            # If dry-run is explicitly requested, prepare a summary and return without writing files
+            if getattr(request, 'dry_run', None) is True:
+                # Determine which files would be skipped due to existing local files
+                skipped = []
+                for f in target_files:
+                    if request.preserve_structure:
+                        target_path = request.destination / f.path
+                    else:
+                        target_path = request.destination / Path(f.path).name
+                    if target_path.exists() and not request.overwrite_existing:
+                        skipped.append(f.path)
+
+                # Update and return the result summarizing what would happen
+                result.status = DownloadStatus.COMPLETED
+                result.downloaded_files = []
+                result.skipped_files = skipped
+                result.failed_files = {}
+                result.completed_at = datetime.now()
+                # matched_files already set above; keep it for verbose output
+                logger.info(f"Dry-run: {len(target_files)} files matched, {len(skipped)} would be skipped")
+                return result
+
+            # Prepare destination
+            if request.create_destination:
+                await self.download_service.ensure_directory(request.destination)
             
             # Reset state tracking
             self._completed_files.clear()
@@ -173,7 +197,7 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult:
             result.failed_files = failed_files
             result.cache_hits = stats.cache_hits
             result.api_calls_made = stats.api_calls
-            
+    
             # Mark as completed
             stats.end_time = datetime.now()
             result.mark_completed()
diff --git a/forklet/interfaces/cli.py b/forklet/interfaces/cli.py
@@ -126,7 +126,9 @@ async def execute_download(
         token: Optional[str],
         concurrent: int,
         overwrite: bool,
-        progress: bool = True
+        progress: bool = True,
+        dry_run: bool = False,
+        verbose: bool = False,
     ) -> None:
         """
         Execute the download operation.
@@ -168,16 +170,17 @@ async def execute_download(
                 max_concurrent_downloads = concurrent,
                 overwrite_existing = overwrite,
                 show_progress_bars = progress
+                ,dry_run = dry_run
             )
             
             # Execute download
             click.echo(
                 f"🚀 Starting download with {concurrent} concurrent workers..."
             )
             result = await self.orchestrator.execute_download(request)
-            
-            # Display results
-            self.display_results(result)
+
+            # Display results (pass through verbose flag)
+            self.display_results(result, verbose=verbose)
             
         except (
             RateLimitError, AuthenticationError, 
@@ -191,7 +194,7 @@ async def execute_download(
             logger.exception("Unexpected error in download operation")
             sys.exit(1)
     
-    def display_results(self, result: DownloadResult) -> None:
+    def display_results(self, result: DownloadResult, verbose: bool = False) -> None:
         """
         Display download results in a user-friendly format.
         
@@ -206,9 +209,28 @@ def display_results(self, result: DownloadResult) -> None:
             
             if result.average_speed is not None:
                 click.echo(f"   ⚡ Speed: {result.average_speed:.2f} bytes/sec")
-            
+
             if result.skipped_files:
                 click.echo(f"   ⏭️  Skipped: {len(result.skipped_files)} files")
+
+            # When verbose, display file paths (matched / downloaded / skipped)
+            if verbose:
+                # Matched files (available in dry-run and set by orchestrator)
+                if hasattr(result, 'matched_files') and result.matched_files:
+                    click.echo("   🔎 Matched files:")
+                    for p in result.matched_files:
+                        click.echo(f"      {p}")
+
+                # For completed runs, show downloaded and skipped paths
+                if result.downloaded_files:
+                    click.echo("   📥 Downloaded paths:")
+                    for p in result.downloaded_files:
+                        click.echo(f"      {p}")
+
+                if result.skipped_files:
+                    click.echo("   ⏭️  Skipped paths:")
+                    for p in result.skipped_files:
+                        click.echo(f"      {p}")
                 
         elif hasattr(result, 'failed_files') and result.failed_files:
             click.echo("⚠️  Download completed with errors:")
diff --git a/forklet/models/download.py b/forklet/models/download.py
@@ -106,6 +106,9 @@ class DownloadRequest:
     # Authentication
     token: Optional[str] = None
 
+    # Dry-run preview mode (do not write files)
+    dry_run: bool = False
+
     # Metadata
     request_id: str = field(default_factory=lambda: f"req_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
     created_at: datetime = field(default_factory=datetime.now)
@@ -189,6 +192,8 @@ class DownloadResult:
     downloaded_files: List[str] = field(default_factory=list)
     skipped_files: List[str] = field(default_factory=list)
     failed_files: Dict[str, str] = field(default_factory=dict)
+    # Matched file paths (populated by orchestrator for verbose reporting)
+    matched_files: List[str] = field(default_factory=list)
 
     # Metadata
     started_at: datetime = field(default_factory=datetime.now)
diff --git a/tests/core/test_orchestrator_dry_run.py b/tests/core/test_orchestrator_dry_run.py
@@ -0,0 +1,71 @@
+import asyncio
+import pytest
+from pathlib import Path
+from datetime import datetime
+
+from forklet.core.orchestrator import DownloadOrchestrator
+from forklet.services.github_api import GitHubAPIService
+from forklet.services.download import DownloadService
+from forklet.infrastructure.retry_manager import RetryManager
+from forklet.infrastructure.rate_limiter import RateLimiter
+from forklet.models.github import GitHubFile
+from forklet.models.download import DownloadRequest, FilterCriteria
+from forklet.models.github import RepositoryInfo, GitReference, RepositoryType
+
+
+@pytest.mark.asyncio
+async def test_orchestrator_dry_run(tmp_path, monkeypatch):
+    # Arrange: create mock files returned by GitHub API
+    files = [
+        GitHubFile(path="src/main.py", type="blob", size=100, download_url="https://api.github.com/file1"),
+        GitHubFile(path="README.md", type="blob", size=50, download_url="https://api.github.com/file2"),
+    ]
+
+    async def mock_get_repository_tree(owner, repo, ref):
+        return files
+
+    # Setup services
+    rate_limiter = RateLimiter()
+    retry_manager = RetryManager()
+    github_service = GitHubAPIService(rate_limiter, retry_manager)
+    download_service = DownloadService(retry_manager)
+
+    # Monkeypatch the github_service.get_repository_tree to return our files
+    monkeypatch.setattr(github_service, 'get_repository_tree', mock_get_repository_tree)
+
+    orchestrator = DownloadOrchestrator(github_service, download_service)
+
+    # Create a fake repository and ref
+    repo = RepositoryInfo(
+        owner='test', name='repo', full_name='test/repo', url='https://github.com/test/repo',
+        default_branch='main', repo_type=RepositoryType.PUBLIC, size=1,
+        is_private=False, is_fork=False, created_at=datetime.now(), updated_at=datetime.now()
+    )
+    ref = GitReference(name='main', ref_type='branch', sha='abc')
+
+    # Create destination and create one existing file to test skipped detection
+    dest = tmp_path / "out"
+    dest.mkdir()
+    existing = dest / "README.md"
+    existing.write_text("existing")
+
+    request = DownloadRequest(
+        repository=repo,
+        git_ref=ref,
+        destination=dest,
+        strategy=None,
+        filters=FilterCriteria(),
+        dry_run=True
+    )
+
+    # Act
+    result = await orchestrator.execute_download(request)
+
+    # Assert
+    assert result is not None
+    assert result.progress.total_files == 2
+    assert result.progress.total_bytes == 150
+    # No files should be downloaded in dry-run
+    assert result.downloaded_files == []
+    # README.md should be reported as skipped
+    assert "README.md" in result.skipped_files