Skip to content

Commit dbf55a4

Browse files
committed
Fixed modelmanager issues
1 parent e93942c commit dbf55a4

File tree

5 files changed

+65
-14
lines changed

5 files changed

+65
-14
lines changed

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
LocalLab - A lightweight AI inference server for running LLMs locally
33
"""
44

5-
__version__ = "0.4.37"
5+
__version__ = "0.4.38"
66

77
# Only import what's necessary initially, lazy-load the rest
88
from .logger import get_logger

locallab/core/app.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,25 @@ def init(backend, **kwargs):
8080
@app.on_event("startup")
8181
async def startup_event():
8282
"""Initialization tasks when the server starts"""
83-
logger.info("Starting LocalLab server...")
83+
logger.info(f"{Fore.CYAN}Starting LocalLab server...{Style.RESET_ALL}")
8484

8585
# Get HuggingFace token and set it in environment if available
8686
from ..config import get_hf_token
8787
hf_token = get_hf_token(interactive=False)
8888
if hf_token:
8989
os.environ["HUGGINGFACE_TOKEN"] = hf_token
90-
logger.info("HuggingFace token loaded from configuration")
90+
logger.info(f"{Fore.GREEN}HuggingFace token loaded from configuration{Style.RESET_ALL}")
9191
else:
92-
logger.warning("No HuggingFace token found. Some models may not be accessible.")
92+
logger.warning(f"{Fore.YELLOW}No HuggingFace token found. Some models may not be accessible.{Style.RESET_ALL}")
93+
94+
# Check if ngrok should be enabled
95+
from ..cli.config import get_config_value
96+
use_ngrok = get_config_value("use_ngrok", False)
97+
if use_ngrok:
98+
from ..utils.networking import setup_ngrok
99+
ngrok_url = await setup_ngrok(SERVER_PORT)
100+
if ngrok_url:
101+
logger.info(f"{Fore.GREEN}Ngrok tunnel established successfully{Style.RESET_ALL}")
93102

94103
# Initialize cache if available
95104
if FASTAPI_CACHE_AVAILABLE:
@@ -99,17 +108,14 @@ async def startup_event():
99108
logger.warning("FastAPICache not available, caching disabled")
100109

101110
# Check for model specified in environment variables or CLI config
102-
# Priority: HUGGINGFACE_MODEL > CLI config > DEFAULT_MODEL
103-
from ..cli.config import get_config_value
104-
105111
model_to_load = (
106112
os.environ.get("HUGGINGFACE_MODEL") or
107113
get_config_value("model_id") or
108114
DEFAULT_MODEL
109115
)
110116

111117
# Log model configuration
112-
logger.info(f"Model configuration:")
118+
logger.info(f"{Fore.CYAN}Model configuration:{Style.RESET_ALL}")
113119
logger.info(f" - Model to load: {model_to_load}")
114120
logger.info(f" - Quantization: {'Enabled - ' + os.environ.get('LOCALLAB_QUANTIZATION_TYPE', QUANTIZATION_TYPE) if os.environ.get('LOCALLAB_ENABLE_QUANTIZATION', '').lower() == 'true' else 'Disabled'}")
115121
logger.info(f" - Attention slicing: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_ATTENTION_SLICING', '').lower() == 'true' else 'Disabled'}")

locallab/model_manager.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,41 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
236236
logger.warning(f"Some optimizations could not be applied: {str(e)}")
237237
return model
238238

239+
async def _load_model_with_optimizations(self, model_id: str):
240+
"""Load and optimize a model with all configured optimizations"""
241+
try:
242+
# Get HF token
243+
from .config import get_hf_token
244+
hf_token = get_hf_token(interactive=False)
245+
246+
# Apply quantization settings
247+
quant_config = self._get_quantization_config()
248+
249+
# Load tokenizer first
250+
self.tokenizer = AutoTokenizer.from_pretrained(
251+
model_id,
252+
token=hf_token if hf_token else None
253+
)
254+
255+
# Load model with optimizations
256+
self.model = AutoModelForCausalLM.from_pretrained(
257+
model_id,
258+
token=hf_token if hf_token else None,
259+
**quant_config
260+
)
261+
262+
# Apply additional optimizations
263+
self.model = self._apply_optimizations(self.model)
264+
265+
# Set model to evaluation mode
266+
self.model.eval()
267+
268+
return self.model
269+
270+
except Exception as e:
271+
logger.error(f"Error loading model: {str(e)}")
272+
raise
273+
239274
async def load_model(self, model_id: str) -> None:
240275
"""Load a model but don't persist it to config"""
241276
if self._loading:
@@ -461,6 +496,7 @@ async def generate(
461496
raise HTTPException(
462497
status_code=500, detail=f"Generation failed: {str(e)}")
463498

499+
464500
def _stream_generate(
465501
self,
466502
inputs: Dict[str, torch.Tensor],
@@ -948,4 +984,4 @@ def unload_model(self) -> None:
948984
# Log model unloading
949985
log_model_unloaded(model_id)
950986

951-
logger.info(f"Model {model_id} unloaded successfully")
987+
logger.info(f"Model {model_id} unloaded successfully")

locallab/utils/networking.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import requests
99
from typing import Optional, Dict, List, Tuple
1010
from ..config import NGROK_TOKEN_ENV, get_ngrok_token, set_env_var
11+
from colorama import Fore, Style
1112

1213
logger = logging.getLogger(__name__)
1314

@@ -20,12 +21,13 @@ def setup_ngrok(port: int) -> Optional[str]:
2021
"""Setup ngrok tunnel for the given port"""
2122
try:
2223
from pyngrok import ngrok, conf
24+
from colorama import Fore, Style
2325

2426
# Get ngrok token using the standardized function
2527
auth_token = get_ngrok_token()
2628

2729
if not auth_token:
28-
logger.error("Ngrok auth token not found. Please configure it using 'locallab config'")
30+
logger.error(f"{Fore.RED}Ngrok auth token not found. Please configure it using 'locallab config'{Style.RESET_ALL}")
2931
return None
3032

3133
# Ensure token is properly set in environment
@@ -50,12 +52,19 @@ def setup_ngrok(port: int) -> Optional[str]:
5052
# Store the URL in environment for clients
5153
os.environ["LOCALLAB_NGROK_URL"] = public_url
5254

53-
logger.info(f"Ngrok tunnel established at: {public_url}")
55+
# Display banner
56+
logger.info(f"""
57+
{Fore.GREEN}┌────────────────────────────────────────────────────────────────┐{Style.RESET_ALL}
58+
{Fore.GREEN}│ NGROK TUNNEL ACTIVE │{Style.RESET_ALL}
59+
{Fore.GREEN}├────────────────────────────────────────────────────────────────┤{Style.RESET_ALL}
60+
{Fore.GREEN}{Style.RESET_ALL} Public URL: {Fore.CYAN}{public_url}{Style.RESET_ALL}
61+
{Fore.GREEN}└────────────────────────────────────────────────────────────────┘{Style.RESET_ALL}
62+
""")
5463
return public_url
5564

5665
except Exception as e:
57-
logger.error(f"Failed to setup ngrok: {str(e)}")
58-
logger.info("Please check your ngrok token using 'locallab config'")
66+
logger.error(f"{Fore.RED}Failed to setup ngrok: {str(e)}{Style.RESET_ALL}")
67+
logger.info(f"{Fore.YELLOW}Please check your ngrok token using 'locallab config'{Style.RESET_ALL}")
5968
return None
6069

6170
def get_network_interfaces() -> List[Dict[str, str]]:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="locallab",
8-
version="0.4.37",
8+
version="0.4.38",
99
packages=find_packages(include=["locallab", "locallab.*"]),
1010
install_requires=[
1111
"fastapi>=0.95.0,<1.0.0",

0 commit comments

Comments
 (0)