Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
openstacksdk==4.2.0
prometheus_client==0.21.1
httpx==0.28.1
pytest<9.0.0,>=8.0.0
pytest-cov==6.0.0
pytest-xdist==3.5.0
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
openstacksdk==4.2.0
httpx==0.28.1
prometheus_client==0.21.1
46 changes: 26 additions & 20 deletions src/collectors/instances_per_hypervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,49 @@
import time
import logging
from util import instances_per_hypervisor
import asyncio

logger = logging.getLogger(__name__)


class InstancesPerHypervisorCollector(Collector):
def __init__(self, cloud_name):
def __init__(self, cloud_name, cache_time):
super().__init__()
self.cloud_name = cloud_name
self.metrics = []
self.last_update = 0
self.cache_time = 60
self.cache_time = cache_time
self.gauge = None

def _fetch_metrics(self):
"""Fetch metrics from OpenStack and update the gauge."""
try:
new_metrics = instances_per_hypervisor.export_metrics(self.cloud_name)
self.metrics = new_metrics
self.last_update = time.time()
# Clear previous metrics
self.gauge = GaugeMetricFamily(
"openstack_peepo_exporter_instances_per_hypervisor",
"Number of instances per hypervisor",
labels=["hypervisor_name", "hypervisor_id"],
)

new_metrics = asyncio.run(
instances_per_hypervisor.export_metrics(self.cloud_name)
)
logger.info(f"Updated metrics with {len(new_metrics)} hypervisors")

# Update the gauge with new values
for metric in new_metrics:
self.gauge.add_metric(
[metric["hypervisor_name"], metric["hypervisor_id"]],
metric["instance_count"],
)

# Update last_update timestamp
self.last_update = time.time()
except Exception as e:
logger.error(f"Error updating metrics: {str(e)}")
raise
logger.error(f"Error updating metrics: {e}")

def collect(self):
if time.time() - self.last_update > self.cache_time:
self._fetch_metrics()

gauge = GaugeMetricFamily(
"openstack_peepo_exporter_instances_per_hypervisor",
"Number of instances per hypervisor",
labels=["hypervisor_name", "hypervisor_id"],
)

for metric in self.metrics:
gauge.add_metric(
[metric["hypervisor_name"], metric["hypervisor_id"]],
metric["instance_count"],
)

yield gauge
yield self.gauge
8 changes: 7 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@
parser.add_argument(
"--cloud", default=os.environ.get("OS_CLOUD"), help="The name of the cloud."
)
parser.add_argument(
"--cache-time",
default=os.environ.get("CACHE_TIME", 60),
type=int,
help="The time to cache the metrics in seconds.",
)
args = parser.parse_args()

# Register the collector
registry = CollectorRegistry()
registry.register(InstancesPerHypervisorCollector(args.cloud))
registry.register(InstancesPerHypervisorCollector(args.cloud, args.cache_time))

# Start the metrics server
start_http_server(args.port, args.addr, registry)
Expand Down
57 changes: 39 additions & 18 deletions src/util/instances_per_hypervisor.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,49 @@
import openstack
import httpx
import asyncio
import logging
import time

logger = logging.getLogger(__name__)

def export_metrics(cloud_name: str):

async def fetch_allocations(client, rp_id):
response = await client.get(f"/resource_providers/{rp_id}/allocations")
return rp_id, response.json()


async def export_metrics(cloud_name: str):
print(f"exporting metrics for {cloud_name}")
conn = openstack.connect(cloud=cloud_name)
headers = conn.session.get_auth_headers()

placement_endpoint = conn.session.get_endpoint(
service_type="placement", interface="public"
)

instance_counts = []
async with httpx.AsyncClient(
base_url=placement_endpoint, headers=headers
) as client:
rps = list(conn.placement.resource_providers())
tasks = [fetch_allocations(client, rp.id) for rp in rps]
start_time = time.time()
results = await asyncio.gather(*tasks)
end_time = time.time()
logger.info(
f"Time taken to fetch {len(rps)} allocations: {end_time - start_time} seconds"
)

for rp in rps:
allocations = next((r[1] for r in results if r[0] == rp.id), {})
instance_count = len(allocations.get("allocations", {}))

for rp in conn.placement.resource_providers():
print(f"rp: {rp}")
hypervisor_id = rp.id
hypervisor_name = rp.name

allocations = conn.placement.get(
f"/resource_providers/{rp.id}/allocations"
).json()
instance_count = len(allocations.get("allocations", {}))
data = {
"hypervisor_id": hypervisor_id,
"hypervisor_name": hypervisor_name,
"instance_count": instance_count,
}
instance_counts.append(data)

print(f"Hypervisor: {hypervisor_name}, Instances: {instance_count}")
data = {
"hypervisor_id": rp.id,
"hypervisor_name": rp.name,
"instance_count": instance_count,
}
instance_counts.append(data)
logger.debug(f"Hypervisor: {rp.name}, Instances: {instance_count}")

return instance_counts
Loading