patched-codes
diff --git a/‎patchwork/common/client/llm/aio.py
Lines changed: 2 additions & 0 deletions b/‎patchwork/common/client/llm/aio.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎patchwork/common/client/llm/google.py
Lines changed: 22 additions & 17 deletions b/‎patchwork/common/client/llm/google.py
Lines changed: 22 additions & 17 deletions
diff --git a/‎patchwork/common/client/llm/openai.py
Lines changed: 27 additions & 10 deletions b/‎patchwork/common/client/llm/openai.py
Lines changed: 27 additions & 10 deletions
@@ -8,6 +8,7 @@
 from typing_extensions import Dict, Iterable, List, Optional, Union
 
 from patchwork.common.client.llm.protocol import NOT_GIVEN, LlmClient, NotGiven
+from patchwork.logger import logger
 
 
 class AioLlmClient(LlmClient):
@@ -46,6 +47,7 @@ def chat_completion(
     ) -> ChatCompletion:
         for client in self.__clients:
             if client.is_model_supported(model):
+                logger.debug(f"Using {client.__class__.__name__} for model {model}")
                 return client.chat_completion(
                     messages,
                     model,
 
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
+import functools
 import time
-from functools import lru_cache
 
 from google.ai.generativelanguage_v1 import GenerateContentResponse
 from google.ai.generativelanguage_v1.services import generative_service, model_service
@@ -23,6 +23,25 @@
 from patchwork.common.client.llm.protocol import NOT_GIVEN, LlmClient, NotGiven
 
 
+@functools.lru_cache
+def _cached_list_model_from_google(api_key):
+    model_client = model_service.ModelServiceClient(
+        client_options=dict(
+            api_key=api_key,
+            # quota_project_id="",
+        )
+    )
+
+    request = ListModelsRequest()
+    response = model_client.list_models(request)
+
+    models = set()
+    for page in response.pages:
+        models.update(map(lambda x: x.name, page.models))
+
+    return models
+
+
 class GoogleLlmClient(LlmClient):
     __SAFETY_SETTINGS = [
         dict(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_NONE"),
@@ -32,36 +51,22 @@ class GoogleLlmClient(LlmClient):
     ]
 
     def __init__(self, api_key: str):
-        self.model_client = model_service.ModelServiceClient(
-            client_options=dict(
-                api_key=api_key,
-                # quota_project_id="",
-            )
-        )
+        self.__api_key = api_key
         self.generative_client = generative_service.GenerativeServiceClient(
             client_options=dict(
                 api_key=api_key,
                 # quota_project_id="",
             )
         )
 
-    @lru_cache(maxsize=None)
     def __get_true_model_names(self) -> set[str]:
-        request = ListModelsRequest()
-        response = self.model_client.list_models(request)
-
-        models = set()
-        for page in response.pages:
-            models.update(map(lambda x: x.name, page.models))
-
-        return models
+        return _cached_list_model_from_google(self.__api_key)
 
     @staticmethod
     def __handle_model_name(model_name) -> str:
         _, _, model = model_name.rpartition("/")
         return model
 
-    @lru_cache(maxsize=None)
     def get_models(self) -> set[str]:
         models = self.__get_true_model_names()
         return set(map(self.__handle_model_name, models))
 
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from functools import lru_cache
+import functools
 
 from openai import OpenAI
 from openai.types.chat import (
@@ -13,22 +13,39 @@
 from patchwork.common.client.llm.protocol import NOT_GIVEN, LlmClient, NotGiven
 
 
+@functools.lru_cache
+def _cached_list_models_from_openai(api_key):
+    client = OpenAI(api_key=api_key)
+    sync_page = client.models.list()
+
+    models = set()
+    for pages in sync_page.iter_pages():
+        models.update(map(lambda x: x.id, pages.data))
+
+    return models
+
+
 class OpenAiLlmClient(LlmClient):
-    def __init__(self, api_key: str):
+    def __init__(self, api_key: str, base_url=None):
         self.api_key = api_key
-        self.client = OpenAI(api_key=api_key)
+        self.base_url = base_url
+        self.client = OpenAI(api_key=api_key, base_url=base_url)
 
-    @lru_cache(maxsize=None)
-    def get_models(self) -> set[str]:
-        sync_page = self.client.models.list()
+    def __is_not_openai_url(self):
+        # Some providers/apis only implement the chat completion endpoint.
+        # We mainly use this to skip using the model endpoints.
+        return self.base_url is not None and self.base_url != "https://api.openai.com/v1"
 
-        models = set()
-        for pages in sync_page.iter_pages():
-            models.update(map(lambda x: x.id, pages.data))
+    def get_models(self) -> set[str]:
+        if self.__is_not_openai_url():
+            return set()
 
-        return models
+        return _cached_list_models_from_openai(self.api_key)
 
     def is_model_supported(self, model: str) -> bool:
+        # might not implement model endpoint
+        if self.__is_not_openai_url():
+            return True
         return model in self.get_models()
 
     def chat_completion(