oracle · elizjo · Jul 7, 2025 · Jul 14, 2025 · Jul 25, 2025 · Jul 25, 2025
@@ -96,18 +96,20 @@ def _validate_value(flag, value):
                 "If you intend to chain a function call to the result, please separate the "
                 "flag and the subsequent function call with separator `-`."
             )
-    
+
     @staticmethod
     def install():
         """Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.
 
-        Return 
+        Return
         ------
         int:
             Installatation status.
         """
         import subprocess
 
-        wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
-        status =  subprocess.run(f"pip install {wheel_file_path}",shell=True)
-        return status.check_returncode
+        wheel_file_path = os.environ.get(
+            "AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
+        )
+        status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
+        return status.check_returncode
@@ -46,6 +46,17 @@ class Config:
         arbitrary_types_allowed = True
         protected_namespaces = ()
 
+class ComputeRank(Serializable):
+    """
+    Represents the cost and performance ranking for a compute shape.
+    """
+    cost: int = Field(
+    None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
+    )
+
+    performance: int = Field(
+    None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
+    )
 
 class GPUSpecs(Serializable):
     """
@@ -61,6 +72,12 @@ class GPUSpecs(Serializable):
     gpu_type: Optional[str] = Field(
         default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
     )
+    quantization: Optional[List[str]] = Field(
+        default_factory=list, description="The quantization format supported by shape. (ex.  bitsandbytes, fp8, etc.)"
+    )
+    ranking: Optional[ComputeRank] = Field(
+        None, description="The relative rank of the cost and performance of the shape."
+    )
 
 
 class GPUShapesIndex(Serializable):
@@ -84,6 +101,10 @@ class ComputeShapeSummary(Serializable):
     including CPU, memory, and optional GPU characteristics.
     """
 
+    available: Optional[bool] = Field(
+        default = False,
+        description="True if shape is available on user tenancy, "
+    )
     core_count: Optional[int] = Field(
         default=None,
         description="Total number of CPU cores available for the compute shape.",

@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
     def __init__(self, reason, status=403, service_payload=None):
         super().__init__(reason, status, service_payload)
 
+class AquaRecommendationError(AquaError):
+    """Exception raised for models incompatible with shape recommendation tool."""
+
+    def __init__(self, reason, status=400, service_payload=None):
+        super().__init__(reason, status, service_payload)
 
 class AquaFileNotFoundError(AquaError, FileNotFoundError):
     """Exception raised for missing target file."""

@@ -1229,10 +1229,10 @@ def load_gpu_shapes_index(
     auth: Optional[Dict[str, Any]] = None,
 ) -> GPUShapesIndex:
     """
-    Load the GPU shapes index, preferring the OS bucket copy over the local one.
+    Load the GPU shapes index, merging based on freshness.
 
-    Attempts to read `gpu_shapes_index.json` from OCI Object Storage first;
-    if that succeeds, those entries will override the local defaults.
+    Compares last-modified timestamps of local and remote files,
+    merging the shapes from the fresher file on top of the older one.
 
     Parameters
     ----------
@@ -1253,7 +1253,9 @@ def load_gpu_shapes_index(
     file_name = "gpu_shapes_index.json"
 
     # Try remote load
-    remote_data: Dict[str, Any] = {}
+    local_data, remote_data = {}, {}
+    local_mtime, remote_mtime = None, None
+
     if CONDA_BUCKET_NS:
         try:
             auth = auth or authutil.default_signer()
@@ -1263,8 +1265,24 @@ def load_gpu_shapes_index(
             logger.debug(
                 "Loading GPU shapes index from Object Storage: %s", storage_path
             )
-            with fsspec.open(storage_path, mode="r", **auth) as f:
+
+            fs = fsspec.filesystem("oci", **auth)
+            with fs.open(storage_path, mode="r") as f:
                 remote_data = json.load(f)
+
+            remote_info = fs.info(storage_path)
+            remote_mtime_str = remote_info.get("timeModified", None)
+            if remote_mtime_str:
+                # Convert OCI timestamp (e.g., 'Mon, 04 Aug 2025 06:37:13 GMT') to epoch time
+                remote_mtime = datetime.strptime(
+                    remote_mtime_str, "%a, %d %b %Y %H:%M:%S %Z"
+                ).timestamp()
+
+                logger.debug(
+                    "Remote GPU shapes last-modified time: %s",
+                    datetime.fromtimestamp(remote_mtime).strftime("%Y-%m-%d %H:%M:%S"),
+                )
+
             logger.debug(
                 "Loaded %d shapes from Object Storage",
                 len(remote_data.get("shapes", {})),
@@ -1273,12 +1291,19 @@ def load_gpu_shapes_index(
             logger.debug("Remote load failed (%s); falling back to local", ex)
 
     # Load local copy
-    local_data: Dict[str, Any] = {}
     local_path = os.path.join(os.path.dirname(__file__), "../resources", file_name)
     try:
         logger.debug("Loading GPU shapes index from local file: %s", local_path)
         with open(local_path) as f:
             local_data = json.load(f)
+
+        local_mtime = os.path.getmtime(local_path)
+
+        logger.debug(
+            "Local GPU shapes last-modified time: %s",
+            datetime.fromtimestamp(local_mtime).strftime("%Y-%m-%d %H:%M:%S"),
+        )
+
         logger.debug(
             "Loaded %d shapes from local file", len(local_data.get("shapes", {}))
         )
@@ -1288,7 +1313,24 @@ def load_gpu_shapes_index(
     # Merge: remote shapes override local
     local_shapes = local_data.get("shapes", {})
     remote_shapes = remote_data.get("shapes", {})
-    merged_shapes = {**local_shapes, **remote_shapes}
+    merged_shapes = {}
+
+    if local_mtime and remote_mtime:
+        if remote_mtime >= local_mtime:
+            logger.debug("Remote data is fresher or equal; merging remote over local.")
+            merged_shapes = {**local_shapes, **remote_shapes}
+        else:
+            logger.debug("Local data is fresher; merging local over remote.")
+            merged_shapes = {**remote_shapes, **local_shapes}
+    elif remote_shapes:
+        logger.debug("Only remote shapes available.")
+        merged_shapes = remote_shapes
+    elif local_shapes:
+        logger.debug("Only local shapes available.")
+        merged_shapes = local_shapes
+    else:
+        logger.error("No GPU shapes data found in either source.")
+        merged_shapes = {}
 
     return GPUShapesIndex(shapes=merged_shapes)
 

@@ -57,6 +57,16 @@ def get(self, id: Union[str, List[str]] = None):
             return self.get_deployment_config(
                 model_id=id.split(",") if "," in id else id
             )
+        elif paths.startswith("aqua/deployments/recommend_shapes"):
+            id = id or self.get_argument("model_id", default=None)
+            if not id or not isinstance(id, str):
+                raise HTTPError(
+                    400,
+                    f"Invalid request format for {self.request.path}. "
+                    "Expected a single model OCID",
+                )
+            id = id.replace(" ", "")
+            return self.get_recommend_shape(model_id=id)
         elif paths.startswith("aqua/deployments/shapes"):
             return self.list_shapes()
         elif paths.startswith("aqua/deployments"):
@@ -161,6 +171,36 @@ def get_deployment_config(self, model_id: Union[str, List[str]]):
 
         return self.finish(deployment_config)
 
+    def get_recommend_shape(self, model_id: str):
+        """
+        Retrieves the valid shape and deployment parameter configuration for one Aqua Model.
+
+        Parameters
+        ----------
+        model_id : str
+            A single model ID (str).
+
+        Returns
+        -------
+        None
+            The function sends the ShapeRecommendReport (generate_table = False) or Rich Diff Table (generate_table = True)
+        """
+        app = AquaDeploymentApp()
+
+        compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
+
+        generate_table = (
+            self.get_argument("generate_table", default="True").lower() == "true"
+        )
+
+        recommend_report = app.recommend_shape(
+            model_id=model_id,
+            compartment_id=compartment_id,
+            generate_table=generate_table,
+        )
+
+        return self.finish(recommend_report)
+
     def list_shapes(self):
         """
         Lists the valid model deployment shapes.
@@ -408,6 +448,7 @@ def get(self, model_deployment_id):
     ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
     ("deployments/config/?([^/]*)", AquaDeploymentHandler),
     ("deployments/shapes/?([^/]*)", AquaDeploymentHandler),
+    ("deployments/recommend_shapes/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)/activate", AquaDeploymentHandler),
     ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),

@@ -11,3 +11,5 @@
 
 DEFAULT_WAIT_TIME = 12000
 DEFAULT_POLL_INTERVAL = 10
+
+SHAPE_MAP = {"NVIDIA_GPU": "GPU"}
@@ -8,11 +8,12 @@
 import shlex
 import threading
 from datetime import datetime, timedelta
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 from cachetools import TTLCache, cached
 from oci.data_science.models import ModelDeploymentShapeSummary
 from pydantic import ValidationError
+from rich.table import Table
 
 from ads.aqua.app import AquaApp, logger
 from ads.aqua.common.entities import (
@@ -63,14 +64,20 @@
     ModelDeploymentConfigSummary,
     MultiModelDeploymentConfigLoader,
 )
-from ads.aqua.modeldeployment.constants import DEFAULT_POLL_INTERVAL, DEFAULT_WAIT_TIME
+from ads.aqua.modeldeployment.constants import (
+    DEFAULT_POLL_INTERVAL,
+    DEFAULT_WAIT_TIME,
+    SHAPE_MAP,
+)
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
     AquaDeploymentDetail,
     ConfigValidationError,
     CreateModelDeploymentDetails,
 )
 from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
+from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
+from ads.aqua.shaperecommend.shape_report import ShapeRecommendationReport
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.utils import UNKNOWN, get_log_links
 from ads.common.work_request import DataScienceWorkRequest
@@ -1243,6 +1250,101 @@ def validate_deployment_params(
             )
         return {"valid": True}
 
+    def valid_compute_shapes(self, **kwargs) -> List["ComputeShapeSummary"]:
+        """
+        Returns a filtered list of GPU-only ComputeShapeSummary objects by reading and parsing a JSON file.
+
+        Parameters
+        ----------
+        file : str
+            Path to the JSON file containing shape data.
+
+        Returns
+        -------
+        List[ComputeShapeSummary]
+            List of ComputeShapeSummary objects passing the checks.
+
+        Raises
+        ------
+        ValueError
+            If the file cannot be opened, parsed, or the 'shapes' key is missing.
+        """
+        compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
+        oci_shapes: list[ModelDeploymentShapeSummary] = self.list_resource(
+            self.ds_client.list_model_deployment_shapes,
+            compartment_id=compartment_id,
+            **kwargs,
+        )
+        set_user_shapes = {shape.name: shape for shape in oci_shapes}
+
+        gpu_shapes_metadata = load_gpu_shapes_index().shapes
+
+        valid_shapes = []
+        # only loops through GPU shapes, update later to include CPU shapes
+        for name, spec in gpu_shapes_metadata.items():
+            if name in set_user_shapes:
+                oci_shape = set_user_shapes.get(name)
+
+                compute_shape = ComputeShapeSummary(
+                    available=True,
+                    core_count=oci_shape.core_count,
+                    memory_in_gbs=oci_shape.memory_in_gbs,
+                    shape_series=SHAPE_MAP.get(oci_shape.shape_series, "GPU"),
+                    name=oci_shape.name,
+                    gpu_specs=spec,
+                )
+            else:
+                compute_shape = ComputeShapeSummary(
+                    available=False, name=name, shape_series="GPU", gpu_specs=spec
+                )
+            valid_shapes.append(compute_shape)
+
+        valid_shapes.sort(
+            key=lambda shape: shape.gpu_specs.gpu_memory_in_gbs, reverse=True
+        )
+        return valid_shapes
+
+    def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
+        """
+        For the CLI (set generate_table = True), generates the table (in rich diff) with valid
+        GPU deployment shapes for the provided model and configuration.
+
+        For the API (set generate_table = False), generates the JSON with valid
+        GPU deployment shapes for the provided model and configuration.
+
+        Validates if recommendations are generated, calls method to construct the rich diff
+        table with the recommendation data.
+
+        Parameters
+        ----------
+        model_ocid : str
+        OCID of the model to recommend feasible compute shapes.
+
+        Returns
+        -------
+        Table (generate_table = True)
+            A table format for the recommendation report with compatible deployment shapes
+            or troubleshooting info citing the largest shapes if no shape is suitable.
+
+        ShapeRecommendationReport (generate_table = False)
+            A recommendation report with compatible deployment shapes, or troubleshooting info
+            citing the largest shapes if no shape is suitable.
+
+        Raises
+        ------
+        AquaValueError
+            If model type is unsupported by tool (no recommendation report generated)
+        """
+        compartment_id = kwargs.get("compartment_id", COMPARTMENT_OCID)
+
+        kwargs["shapes"] = self.valid_compute_shapes(compartment_id=compartment_id)
+
+        shape_recommend = AquaShapeRecommend()
+
+        shape_recommend_report = shape_recommend.which_shapes(**kwargs)
+
+        return shape_recommend_report
+
     @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
     @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
     def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,3 +11,5 @@

		DEFAULT_WAIT_TIME = 12000
		DEFAULT_POLL_INTERVAL = 10

		SHAPE_MAP = {"NVIDIA_GPU": "GPU"}
Copy link Member mrDzurb Aug 4, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Just in case, here the full list of supported series. `SHAPE_SERIES_AMD_ROME = "AMD_ROME" SHAPE_SERIES_INTEL_SKYLAKE = "INTEL_SKYLAKE" SHAPE_SERIES_NVIDIA_GPU = "NVIDIA_GPU" SHAPE_SERIES_GENERIC = "GENERIC" SHAPE_SERIES_LEGACY = "LEGACY" SHAPE_SERIES_ARM = "ARM"` Copy link Member Author elizjo Aug 5, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I did research this- the issue is that I'm not 100% sure if these map to GPU/CPU types. For now, I will map all to CPU types except for NVIDIA_GPU since the AMD GPU shape (MX300) did not have the AMD_ROME for the shape_series parameter. I also did not see any of these series (except for NVIDIA_GPU) when we queried for GPU only shapes.