UtkarshTheDev
diff --git a/‎docs/guides/troubleshooting.md
Lines changed: 21 additions & 0 deletions b/‎docs/guides/troubleshooting.md
Lines changed: 21 additions & 0 deletions
diff --git a/‎locallab/cli/interactive.py
Lines changed: 22 additions & 9 deletions b/‎locallab/cli/interactive.py
Lines changed: 22 additions & 9 deletions
diff --git a/‎locallab/core/app.py
Lines changed: 22 additions & 1 deletion b/‎locallab/core/app.py
Lines changed: 22 additions & 1 deletion
@@ -44,6 +44,27 @@ os.environ["LOCALLAB_QUANTIZATION_TYPE"] = "int8"
 os.environ["HUGGINGFACE_MODEL"] = "microsoft/phi-2"  # Smaller model
 ```
 
+### Authentication Issues
+
+**Issue:** HuggingFace Authentication Error
+```
+ERROR: Failed to load model: Invalid credentials in Authorization header
+```
+**Solution:**
+1. Get a HuggingFace token from [HuggingFace tokens page](https://huggingface.co/settings/tokens)
+2. Set the token in one of these ways:
+   ```python
+   # Option 1: Environment variable
+   os.environ["HUGGINGFACE_TOKEN"] = "your_token_here"
+   
+   # Option 2: Configuration file
+   from locallab.cli.config import set_config_value
+   set_config_value("huggingface_token", "your_token_here")
+   ```
+3. Restart the LocalLab server
+
+Note: Some models like microsoft/phi-2 require authentication to download.
+
 ## Related Documentation
 - [Getting Started](./getting-started.md)
 - [Performance Guide](./features/performance.md)
 
@@ -42,15 +42,6 @@ def get_missing_required_env_vars() -> List[str]:
 def prompt_for_config(use_ngrok: bool = None, port: int = None, ngrok_auth_token: str = None, force_reconfigure: bool = False) -> Dict[str, Any]:
     """
     Interactive prompt for configuration
-    
-    Args:
-        use_ngrok: Whether to use ngrok
-        port: Port to run the server on
-        ngrok_auth_token: Ngrok authentication token
-        force_reconfigure: Whether to force reconfiguration of all settings
-        
-    Returns:
-        Dict of configuration values
     """
     # Import here to avoid circular imports
     from .config import load_config, get_config_value
@@ -72,6 +63,28 @@ def prompt_for_config(use_ngrok: bool = None, port: int = None, ngrok_auth_token
     # Determine if we're in Colab
     in_colab = is_in_colab()
 
+    # If in Colab, use simplified configuration
+    if in_colab:
+        # Set default values for Colab environment
+        config.setdefault("port", 8000)
+        config.setdefault("use_ngrok", True)
+        config.setdefault("model_id", os.environ.get("HUGGINGFACE_MODEL", DEFAULT_MODEL))
+        
+        # Use ngrok token from environment if available
+        if os.environ.get("NGROK_AUTH_TOKEN"):
+            config["ngrok_auth_token"] = os.environ.get("NGROK_AUTH_TOKEN")
+        elif ngrok_auth_token:
+            config["ngrok_auth_token"] = ngrok_auth_token
+            
+        # Set some reasonable defaults for Colab
+        config.setdefault("enable_quantization", True)
+        config.setdefault("quantization_type", "int8")
+        config.setdefault("enable_attention_slicing", True)
+        config.setdefault("enable_flash_attention", True)
+        config.setdefault("enable_better_transformer", True)
+        
+        return config
+    
     # Check for GPU
     has_gpu = False
     gpu_memory = get_gpu_memory()
 
@@ -81,6 +81,15 @@ async def startup_event():
     """Initialization tasks when the server starts"""
     logger.info("Starting LocalLab server...")
 
+    # Get HuggingFace token and set it in environment if available
+    from ..config import get_hf_token
+    hf_token = get_hf_token(interactive=False)
+    if hf_token:
+        os.environ["HUGGINGFACE_TOKEN"] = hf_token
+        logger.info("HuggingFace token loaded from configuration")
+    else:
+        logger.warning("No HuggingFace token found. Some models may not be accessible.")
+    
     # Initialize cache if available
     if FASTAPI_CACHE_AVAILABLE:
         FastAPICache.init(InMemoryBackend(), prefix="locallab-cache")
@@ -182,6 +191,15 @@ async def load_model_in_background(model_id: str):
     start_time = time.time()
 
     try:
+        # Ensure HF token is set before loading model
+        from ..config import get_hf_token
+        hf_token = get_hf_token(interactive=False)
+        if hf_token:
+            os.environ["HUGGINGFACE_TOKEN"] = hf_token
+            logger.debug("Using HuggingFace token from configuration")
+        else:
+            logger.warning("No HuggingFace token found. Some models may not be accessible.")
+        
         # Wait for the model to load
         await model_manager.load_model(model_id)
 
@@ -191,4 +209,7 @@ async def load_model_in_background(model_id: str):
         # We don't need to call log_model_loaded here since it's already done in the model_manager
         logger.info(f"{Fore.GREEN}Model {model_id} loaded successfully in {load_time:.2f} seconds!{Style.RESET_ALL}")
     except Exception as e:
-        logger.error(f"Failed to load model {model_id}: {str(e)}") 
+        logger.error(f"Failed to load model {model_id}: {str(e)}")
+        if "401 Client Error: Unauthorized" in str(e):
+            logger.error("This appears to be an authentication error. Please ensure your HuggingFace token is set correctly.")
+            logger.info("You can set your token using: locallab config")