Fixed modelmanager issues

UtkarshTheDev · UtkarshTheDev · commit dbf55a4f1192 · 2025-03-17T10:36:35.000+05:30
diff --git a/locallab/__init__.py b/locallab/__init__.py
@@ -2,7 +2,7 @@
 LocalLab - A lightweight AI inference server for running LLMs locally
 """
 
-__version__ = "0.4.37"
+__version__ = "0.4.38"
 
 # Only import what's necessary initially, lazy-load the rest
 from .logger import get_logger
diff --git a/locallab/core/app.py b/locallab/core/app.py
@@ -80,16 +80,25 @@ def init(backend, **kwargs):
 @app.on_event("startup")
 async def startup_event():
     """Initialization tasks when the server starts"""
-    logger.info("Starting LocalLab server...")
+    logger.info(f"{Fore.CYAN}Starting LocalLab server...{Style.RESET_ALL}")
     
     # Get HuggingFace token and set it in environment if available
     from ..config import get_hf_token
     hf_token = get_hf_token(interactive=False)
     if hf_token:
         os.environ["HUGGINGFACE_TOKEN"] = hf_token
-        logger.info("HuggingFace token loaded from configuration")
+        logger.info(f"{Fore.GREEN}HuggingFace token loaded from configuration{Style.RESET_ALL}")
     else:
-        logger.warning("No HuggingFace token found. Some models may not be accessible.")
+        logger.warning(f"{Fore.YELLOW}No HuggingFace token found. Some models may not be accessible.{Style.RESET_ALL}")
+    
+    # Check if ngrok should be enabled
+    from ..cli.config import get_config_value
+    use_ngrok = get_config_value("use_ngrok", False)
+    if use_ngrok:
+        from ..utils.networking import setup_ngrok
+        ngrok_url = await setup_ngrok(SERVER_PORT)
+        if ngrok_url:
+            logger.info(f"{Fore.GREEN}Ngrok tunnel established successfully{Style.RESET_ALL}")
     
     # Initialize cache if available
     if FASTAPI_CACHE_AVAILABLE:
@@ -99,17 +108,14 @@ async def startup_event():
         logger.warning("FastAPICache not available, caching disabled")
     
     # Check for model specified in environment variables or CLI config
-    # Priority: HUGGINGFACE_MODEL > CLI config > DEFAULT_MODEL
-    from ..cli.config import get_config_value
-    
     model_to_load = (
         os.environ.get("HUGGINGFACE_MODEL") or 
         get_config_value("model_id") or 
         DEFAULT_MODEL
     )
     
     # Log model configuration
-    logger.info(f"Model configuration:")
+    logger.info(f"{Fore.CYAN}Model configuration:{Style.RESET_ALL}")
     logger.info(f" - Model to load: {model_to_load}")
     logger.info(f" - Quantization: {'Enabled - ' + os.environ.get('LOCALLAB_QUANTIZATION_TYPE', QUANTIZATION_TYPE) if os.environ.get('LOCALLAB_ENABLE_QUANTIZATION', '').lower() == 'true' else 'Disabled'}")
     logger.info(f" - Attention slicing: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_ATTENTION_SLICING', '').lower() == 'true' else 'Disabled'}")
diff --git a/locallab/model_manager.py b/locallab/model_manager.py
@@ -236,6 +236,41 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
             logger.warning(f"Some optimizations could not be applied: {str(e)}")
             return model
 
+    async def _load_model_with_optimizations(self, model_id: str):
+        """Load and optimize a model with all configured optimizations"""
+        try:
+            # Get HF token
+            from .config import get_hf_token
+            hf_token = get_hf_token(interactive=False)
+            
+            # Apply quantization settings
+            quant_config = self._get_quantization_config()
+            
+            # Load tokenizer first
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_id,
+                token=hf_token if hf_token else None
+            )
+            
+            # Load model with optimizations
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                token=hf_token if hf_token else None,
+                **quant_config
+            )
+            
+            # Apply additional optimizations
+            self.model = self._apply_optimizations(self.model)
+            
+            # Set model to evaluation mode
+            self.model.eval()
+            
+            return self.model
+            
+        except Exception as e:
+            logger.error(f"Error loading model: {str(e)}")
+            raise
+
     async def load_model(self, model_id: str) -> None:
         """Load a model but don't persist it to config"""
         if self._loading:
@@ -461,6 +496,7 @@ async def generate(
             raise HTTPException(
                 status_code=500, detail=f"Generation failed: {str(e)}")
 
+
     def _stream_generate(
         self,
         inputs: Dict[str, torch.Tensor],
@@ -948,4 +984,4 @@ def unload_model(self) -> None:
             # Log model unloading
             log_model_unloaded(model_id)
 
-            logger.info(f"Model {model_id} unloaded successfully")
+            logger.info(f"Model {model_id} unloaded successfully")
diff --git a/locallab/utils/networking.py b/locallab/utils/networking.py
@@ -8,6 +8,7 @@
 import requests
 from typing import Optional, Dict, List, Tuple
 from ..config import NGROK_TOKEN_ENV, get_ngrok_token, set_env_var
+from colorama import Fore, Style
 
 logger = logging.getLogger(__name__)
 
@@ -20,12 +21,13 @@ def setup_ngrok(port: int) -> Optional[str]:
     """Setup ngrok tunnel for the given port"""
     try:
         from pyngrok import ngrok, conf
+        from colorama import Fore, Style
         
         # Get ngrok token using the standardized function
         auth_token = get_ngrok_token()
         
         if not auth_token:
-            logger.error("Ngrok auth token not found. Please configure it using 'locallab config'")
+            logger.error(f"{Fore.RED}Ngrok auth token not found. Please configure it using 'locallab config'{Style.RESET_ALL}")
             return None
             
         # Ensure token is properly set in environment
@@ -50,12 +52,19 @@ def setup_ngrok(port: int) -> Optional[str]:
         # Store the URL in environment for clients
         os.environ["LOCALLAB_NGROK_URL"] = public_url
         
-        logger.info(f"Ngrok tunnel established at: {public_url}")
+        # Display banner
+        logger.info(f"""
+{Fore.GREEN}┌────────────────────────────────────────────────────────────────┐{Style.RESET_ALL}
+{Fore.GREEN}│                      NGROK TUNNEL ACTIVE                       │{Style.RESET_ALL}
+{Fore.GREEN}├────────────────────────────────────────────────────────────────┤{Style.RESET_ALL}
+{Fore.GREEN}│{Style.RESET_ALL} Public URL: {Fore.CYAN}{public_url}{Style.RESET_ALL}
+{Fore.GREEN}└────────────────────────────────────────────────────────────────┘{Style.RESET_ALL}
+""")
         return public_url
         
     except Exception as e:
-        logger.error(f"Failed to setup ngrok: {str(e)}")
-        logger.info("Please check your ngrok token using 'locallab config'")
+        logger.error(f"{Fore.RED}Failed to setup ngrok: {str(e)}{Style.RESET_ALL}")
+        logger.info(f"{Fore.YELLOW}Please check your ngrok token using 'locallab config'{Style.RESET_ALL}")
         return None
 
 def get_network_interfaces() -> List[Dict[str, str]]:
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="locallab",
-    version="0.4.37",
+    version="0.4.38",
     packages=find_packages(include=["locallab", "locallab.*"]),
     install_requires=[
         "fastapi>=0.95.0,<1.0.0",